gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "memmodel.h"
  79 #include "tm_p.h"
  80 #include "ssa.h"
  81 #include "expmed.h"
  82 #include "insn-config.h"
  83 #include "emit-rtl.h"
  84 #include "recog.h"
  85 #include "cgraph.h"
  86 #include "gimple-pretty-print.h"
  87 #include "alias.h"
  88 #include "fold-const.h"
  89 #include "stor-layout.h"
  90 #include "tree-eh.h"
  91 #include "gimplify.h"
  92 #include "gimple-iterator.h"
  93 #include "gimplify-me.h"
  94 #include "tree-cfg.h"
  95 #include "tree-ssa-loop-ivopts.h"
  96 #include "tree-ssa-loop-manip.h"
  97 #include "tree-ssa-loop-niter.h"
  98 #include "tree-ssa-loop.h"
  99 #include "explow.h"
 100 #include "expr.h"
 101 #include "tree-dfa.h"
 102 #include "tree-ssa.h"
 103 #include "cfgloop.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "tree-affine.h"
 107 #include "tree-ssa-propagate.h"
 108 #include "tree-ssa-address.h"
 109 #include "builtins.h"
 110 #include "tree-vectorizer.h"
 111
 112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 113    cost of different addressing modes.  This should be moved to a TBD
 114    interface between the GIMPLE and RTL worlds.  */
 115
 116 /* The infinite cost.  */
 117 #define INFTY 1000000000
 118
 119 /* Returns the expected number of loop iterations for LOOP.
 120    The average trip count is computed from profile data if it
 121    exists. */
 122
 123 static inline HOST_WIDE_INT
 124 avg_loop_niter (struct loop *loop)
 125 {
 126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 127   if (niter == -1)
 128     {
 129       niter = likely_max_stmt_executions_int (loop);
 130
 131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
 132         return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_REF_ADDRESS,      /* Use is an address for an explicit memory
 170                            reference.  */
 171   USE_PTR_ADDRESS,      /* Use is a pointer argument to a function in
 172                            cases where the expansion of the function
 173                            will turn the argument into a normal address.  */
 174   USE_COMPARE           /* Use is a compare.  */
 175 };
 176
 177 /* Cost of a computation.  */
 178 struct comp_cost
 179 {
 180   comp_cost (): cost (0), complexity (0), scratch (0)
 181   {}
 182
 183   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
 184     : cost (cost), complexity (complexity), scratch (scratch)
 185   {}
 186
 187   /* Returns true if COST is infinite.  */
 188   bool infinite_cost_p ();
 189
 190   /* Adds costs COST1 and COST2.  */
 191   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 192
 193   /* Adds COST to the comp_cost.  */
 194   comp_cost operator+= (comp_cost cost);
 195
 196   /* Adds constant C to this comp_cost.  */
 197   comp_cost operator+= (HOST_WIDE_INT c);
 198
 199   /* Subtracts constant C to this comp_cost.  */
 200   comp_cost operator-= (HOST_WIDE_INT c);
 201
 202   /* Divide the comp_cost by constant C.  */
 203   comp_cost operator/= (HOST_WIDE_INT c);
 204
 205   /* Multiply the comp_cost by constant C.  */
 206   comp_cost operator*= (HOST_WIDE_INT c);
 207
 208   /* Subtracts costs COST1 and COST2.  */
 209   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 210
 211   /* Subtracts COST from this comp_cost.  */
 212   comp_cost operator-= (comp_cost cost);
 213
 214   /* Returns true if COST1 is smaller than COST2.  */
 215   friend bool operator< (comp_cost cost1, comp_cost cost2);
 216
 217   /* Returns true if COST1 and COST2 are equal.  */
 218   friend bool operator== (comp_cost cost1, comp_cost cost2);
 219
 220   /* Returns true if COST1 is smaller or equal than COST2.  */
 221   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 222
 223   int64_t cost;         /* The runtime cost.  */
 224   unsigned complexity;  /* The estimate of the complexity of the code for
 225                            the computation (in no concrete units --
 226                            complexity field should be larger for more
 227                            complex expressions and addressing modes).  */
 228   int64_t scratch;      /* Scratch used during cost computation.  */
 229 };
 230
 231 static const comp_cost no_cost;
 232 static const comp_cost infinite_cost (INFTY, 0, INFTY);
 233
 234 bool
 235 comp_cost::infinite_cost_p ()
 236 {
 237   return cost == INFTY;
 238 }
 239
 240 comp_cost
 241 operator+ (comp_cost cost1, comp_cost cost2)
 242 {
 243   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 244     return infinite_cost;
 245
 246   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
 247   cost1.cost += cost2.cost;
 248   cost1.complexity += cost2.complexity;
 249
 250   return cost1;
 251 }
 252
 253 comp_cost
 254 operator- (comp_cost cost1, comp_cost cost2)
 255 {
 256   if (cost1.infinite_cost_p ())
 257     return infinite_cost;
 258
 259   gcc_assert (!cost2.infinite_cost_p ());
 260   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
 261
 262   cost1.cost -= cost2.cost;
 263   cost1.complexity -= cost2.complexity;
 264
 265   return cost1;
 266 }
 267
 268 comp_cost
 269 comp_cost::operator+= (comp_cost cost)
 270 {
 271   *this = *this + cost;
 272   return *this;
 273 }
 274
 275 comp_cost
 276 comp_cost::operator+= (HOST_WIDE_INT c)
 277 {
 278   if (infinite_cost_p ())
 279     return *this;
 280
 281   gcc_assert (this->cost + c < infinite_cost.cost);
 282   this->cost += c;
 283
 284   return *this;
 285 }
 286
 287 comp_cost
 288 comp_cost::operator-= (HOST_WIDE_INT c)
 289 {
 290   if (infinite_cost_p ())
 291     return *this;
 292
 293   gcc_assert (this->cost - c < infinite_cost.cost);
 294   this->cost -= c;
 295
 296   return *this;
 297 }
 298
 299 comp_cost
 300 comp_cost::operator/= (HOST_WIDE_INT c)
 301 {
 302   gcc_assert (c != 0);
 303   if (infinite_cost_p ())
 304     return *this;
 305
 306   this->cost /= c;
 307
 308   return *this;
 309 }
 310
 311 comp_cost
 312 comp_cost::operator*= (HOST_WIDE_INT c)
 313 {
 314   if (infinite_cost_p ())
 315     return *this;
 316
 317   gcc_assert (this->cost * c < infinite_cost.cost);
 318   this->cost *= c;
 319
 320   return *this;
 321 }
 322
 323 comp_cost
 324 comp_cost::operator-= (comp_cost cost)
 325 {
 326   *this = *this - cost;
 327   return *this;
 328 }
 329
 330 bool
 331 operator< (comp_cost cost1, comp_cost cost2)
 332 {
 333   if (cost1.cost == cost2.cost)
 334     return cost1.complexity < cost2.complexity;
 335
 336   return cost1.cost < cost2.cost;
 337 }
 338
 339 bool
 340 operator== (comp_cost cost1, comp_cost cost2)
 341 {
 342   return cost1.cost == cost2.cost
 343     && cost1.complexity == cost2.complexity;
 344 }
 345
 346 bool
 347 operator<= (comp_cost cost1, comp_cost cost2)
 348 {
 349   return cost1 < cost2 || cost1 == cost2;
 350 }
 351
 352 struct iv_inv_expr_ent;
 353
 354 /* The candidate - cost pair.  */
 355 struct cost_pair
 356 {
 357   struct iv_cand *cand; /* The candidate.  */
 358   comp_cost cost;       /* The cost.  */
 359   enum tree_code comp;  /* For iv elimination, the comparison.  */
 360   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 361                            preserved when representing iv_use with iv_cand.  */
 362   bitmap inv_exprs;     /* The list of newly created invariant expressions
 363                            when representing iv_use with iv_cand.  */
 364   tree value;           /* For final value elimination, the expression for
 365                            the final value of the iv.  For iv elimination,
 366                            the new bound to compare with.  */
 367 };
 368
 369 /* Use.  */
 370 struct iv_use
 371 {
 372   unsigned id;          /* The id of the use.  */
 373   unsigned group_id;    /* The group id the use belongs to.  */
 374   enum use_type type;   /* Type of the use.  */
 375   tree mem_type;        /* The memory type to use when testing whether an
 376                            address is legitimate, and what the address's
 377                            cost is.  */
 378   struct iv *iv;        /* The induction variable it is based on.  */
 379   gimple *stmt;         /* Statement in that it occurs.  */
 380   tree *op_p;           /* The place where it occurs.  */
 381
 382   tree addr_base;       /* Base address with const offset stripped.  */
 383   poly_uint64_pod addr_offset;
 384                         /* Const offset stripped from base address.  */
 385 };
 386
 387 /* Group of uses.  */
 388 struct iv_group
 389 {
 390   /* The id of the group.  */
 391   unsigned id;
 392   /* Uses of the group are of the same type.  */
 393   enum use_type type;
 394   /* The set of "related" IV candidates, plus the important ones.  */
 395   bitmap related_cands;
 396   /* Number of IV candidates in the cost_map.  */
 397   unsigned n_map_members;
 398   /* The costs wrto the iv candidates.  */
 399   struct cost_pair *cost_map;
 400   /* The selected candidate for the group.  */
 401   struct iv_cand *selected;
 402   /* Uses in the group.  */
 403   vec<struct iv_use *> vuses;
 404 };
 405
 406 /* The position where the iv is computed.  */
 407 enum iv_position
 408 {
 409   IP_NORMAL,            /* At the end, just before the exit condition.  */
 410   IP_END,               /* At the end of the latch block.  */
 411   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 412   IP_AFTER_USE,         /* Immediately after a specific use.  */
 413   IP_ORIGINAL           /* The original biv.  */
 414 };
 415
 416 /* The induction variable candidate.  */
 417 struct iv_cand
 418 {
 419   unsigned id;          /* The number of the candidate.  */
 420   bool important;       /* Whether this is an "important" candidate, i.e. such
 421                            that it should be considered by all uses.  */
 422   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 423   gimple *incremented_at;/* For original biv, the statement where it is
 424                            incremented.  */
 425   tree var_before;      /* The variable used for it before increment.  */
 426   tree var_after;       /* The variable used for it after increment.  */
 427   struct iv *iv;        /* The value of the candidate.  NULL for
 428                            "pseudocandidate" used to indicate the possibility
 429                            to replace the final value of an iv by direct
 430                            computation of the value.  */
 431   unsigned cost;        /* Cost of the candidate.  */
 432   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 433   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 434                               where it is incremented.  */
 435   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 436                            iv_cand.  */
 437   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 438                            hanlde it as a new invariant expression which will
 439                            be hoisted out of loop.  */
 440   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 441                            smaller type.  */
 442 };
 443
 444 /* Hashtable entry for common candidate derived from iv uses.  */
 445 struct iv_common_cand
 446 {
 447   tree base;
 448   tree step;
 449   /* IV uses from which this common candidate is derived.  */
 450   auto_vec<struct iv_use *> uses;
 451   hashval_t hash;
 452 };
 453
 454 /* Hashtable helpers.  */
 455
 456 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 457 {
 458   static inline hashval_t hash (const iv_common_cand *);
 459   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 460 };
 461
 462 /* Hash function for possible common candidates.  */
 463
 464 inline hashval_t
 465 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 466 {
 467   return ccand->hash;
 468 }
 469
 470 /* Hash table equality function for common candidates.  */
 471
 472 inline bool
 473 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 474                               const iv_common_cand *ccand2)
 475 {
 476   return (ccand1->hash == ccand2->hash
 477           && operand_equal_p (ccand1->base, ccand2->base, 0)
 478           && operand_equal_p (ccand1->step, ccand2->step, 0)
 479           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 480               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 481 }
 482
 483 /* Loop invariant expression hashtable entry.  */
 484
 485 struct iv_inv_expr_ent
 486 {
 487   /* Tree expression of the entry.  */
 488   tree expr;
 489   /* Unique indentifier.  */
 490   int id;
 491   /* Hash value.  */
 492   hashval_t hash;
 493 };
 494
 495 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 496
 497 static int
 498 sort_iv_inv_expr_ent (const void *a, const void *b)
 499 {
 500   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 501   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 502
 503   unsigned id1 = (*e1)->id;
 504   unsigned id2 = (*e2)->id;
 505
 506   if (id1 < id2)
 507     return -1;
 508   else if (id1 > id2)
 509     return 1;
 510   else
 511     return 0;
 512 }
 513
 514 /* Hashtable helpers.  */
 515
 516 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 517 {
 518   static inline hashval_t hash (const iv_inv_expr_ent *);
 519   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 520 };
 521
 522 /* Return true if uses of type TYPE represent some form of address.  */
 523
 524 inline bool
 525 address_p (use_type type)
 526 {
 527   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
 528 }
 529
 530 /* Hash function for loop invariant expressions.  */
 531
 532 inline hashval_t
 533 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 534 {
 535   return expr->hash;
 536 }
 537
 538 /* Hash table equality function for expressions.  */
 539
 540 inline bool
 541 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 542                            const iv_inv_expr_ent *expr2)
 543 {
 544   return expr1->hash == expr2->hash
 545          && operand_equal_p (expr1->expr, expr2->expr, 0);
 546 }
 547
 548 struct ivopts_data
 549 {
 550   /* The currently optimized loop.  */
 551   struct loop *current_loop;
 552   location_t loop_loc;
 553
 554   /* Numbers of iterations for all exits of the current loop.  */
 555   hash_map<edge, tree_niter_desc *> *niters;
 556
 557   /* Number of registers used in it.  */
 558   unsigned regs_used;
 559
 560   /* The size of version_info array allocated.  */
 561   unsigned version_info_size;
 562
 563   /* The array of information for the ssa names.  */
 564   struct version_info *version_info;
 565
 566   /* The hashtable of loop invariant expressions created
 567      by ivopt.  */
 568   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 569
 570   /* The bitmap of indices in version_info whose value was changed.  */
 571   bitmap relevant;
 572
 573   /* The uses of induction variables.  */
 574   vec<iv_group *> vgroups;
 575
 576   /* The candidates.  */
 577   vec<iv_cand *> vcands;
 578
 579   /* A bitmap of important candidates.  */
 580   bitmap important_candidates;
 581
 582   /* Cache used by tree_to_aff_combination_expand.  */
 583   hash_map<tree, name_expansion *> *name_expansion_cache;
 584
 585   /* The hashtable of common candidates derived from iv uses.  */
 586   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 587
 588   /* The common candidates.  */
 589   vec<iv_common_cand *> iv_common_cands;
 590
 591   /* The maximum invariant variable id.  */
 592   unsigned max_inv_var_id;
 593
 594   /* The maximum invariant expression id.  */
 595   unsigned max_inv_expr_id;
 596
 597   /* Number of no_overflow BIVs which are not used in memory address.  */
 598   unsigned bivs_not_used_in_addr;
 599
 600   /* Obstack for iv structure.  */
 601   struct obstack iv_obstack;
 602
 603   /* Whether to consider just related and important candidates when replacing a
 604      use.  */
 605   bool consider_all_candidates;
 606
 607   /* Are we optimizing for speed?  */
 608   bool speed;
 609
 610   /* Whether the loop body includes any function calls.  */
 611   bool body_includes_call;
 612
 613   /* Whether the loop body can only be exited via single exit.  */
 614   bool loop_single_exit_p;
 615 };
 616
 617 /* An assignment of iv candidates to uses.  */
 618
 619 struct iv_ca
 620 {
 621   /* The number of uses covered by the assignment.  */
 622   unsigned upto;
 623
 624   /* Number of uses that cannot be expressed by the candidates in the set.  */
 625   unsigned bad_groups;
 626
 627   /* Candidate assigned to a use, together with the related costs.  */
 628   struct cost_pair **cand_for_group;
 629
 630   /* Number of times each candidate is used.  */
 631   unsigned *n_cand_uses;
 632
 633   /* The candidates used.  */
 634   bitmap cands;
 635
 636   /* The number of candidates in the set.  */
 637   unsigned n_cands;
 638
 639   /* The number of invariants needed, including both invariant variants and
 640      invariant expressions.  */
 641   unsigned n_invs;
 642
 643   /* Total cost of expressing uses.  */
 644   comp_cost cand_use_cost;
 645
 646   /* Total cost of candidates.  */
 647   int64_t cand_cost;
 648
 649   /* Number of times each invariant variable is used.  */
 650   unsigned *n_inv_var_uses;
 651
 652   /* Number of times each invariant expression is used.  */
 653   unsigned *n_inv_expr_uses;
 654
 655   /* Total cost of the assignment.  */
 656   comp_cost cost;
 657 };
 658
 659 /* Difference of two iv candidate assignments.  */
 660
 661 struct iv_ca_delta
 662 {
 663   /* Changed group.  */
 664   struct iv_group *group;
 665
 666   /* An old assignment (for rollback purposes).  */
 667   struct cost_pair *old_cp;
 668
 669   /* A new assignment.  */
 670   struct cost_pair *new_cp;
 671
 672   /* Next change in the list.  */
 673   struct iv_ca_delta *next;
 674 };
 675
 676 /* Bound on number of candidates below that all candidates are considered.  */
 677
 678 #define CONSIDER_ALL_CANDIDATES_BOUND \
 679   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 680
 681 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 682    optimizing such a loop would help, and it would take ages).  */
 683
 684 #define MAX_CONSIDERED_GROUPS \
 685   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 686
 687 /* If there are at most this number of ivs in the set, try removing unnecessary
 688    ivs from the set always.  */
 689
 690 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 691   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 692
 693 /* The list of trees for that the decl_rtl field must be reset is stored
 694    here.  */
 695
 696 static vec<tree> decl_rtl_to_reset;
 697
 698 static comp_cost force_expr_to_var_cost (tree, bool);
 699
 700 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 701
 702 edge
 703 single_dom_exit (struct loop *loop)
 704 {
 705   edge exit = single_exit (loop);
 706
 707   if (!exit)
 708     return NULL;
 709
 710   if (!just_once_each_iteration_p (loop, exit->src))
 711     return NULL;
 712
 713   return exit;
 714 }
 715
 716 /* Dumps information about the induction variable IV to FILE.  Don't dump
 717    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 718    preceding spaces indicated by INDENT_LEVEL.  */
 719
 720 void
 721 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 722 {
 723   const char *p;
 724   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 725
 726   if (indent_level > 4)
 727     indent_level = 4;
 728   p = spaces + 8 - (indent_level << 1);
 729
 730   fprintf (file, "%sIV struct:\n", p);
 731   if (iv->ssa_name && dump_name)
 732     {
 733       fprintf (file, "%s  SSA_NAME:\t", p);
 734       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 735       fprintf (file, "\n");
 736     }
 737
 738   fprintf (file, "%s  Type:\t", p);
 739   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 740   fprintf (file, "\n");
 741
 742   fprintf (file, "%s  Base:\t", p);
 743   print_generic_expr (file, iv->base, TDF_SLIM);
 744   fprintf (file, "\n");
 745
 746   fprintf (file, "%s  Step:\t", p);
 747   print_generic_expr (file, iv->step, TDF_SLIM);
 748   fprintf (file, "\n");
 749
 750   if (iv->base_object)
 751     {
 752       fprintf (file, "%s  Object:\t", p);
 753       print_generic_expr (file, iv->base_object, TDF_SLIM);
 754       fprintf (file, "\n");
 755     }
 756
 757   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 758
 759   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 760            p, iv->no_overflow ? "No-overflow" : "Overflow");
 761 }
 762
 763 /* Dumps information about the USE to FILE.  */
 764
 765 void
 766 dump_use (FILE *file, struct iv_use *use)
 767 {
 768   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 769   fprintf (file, "    At stmt:\t");
 770   print_gimple_stmt (file, use->stmt, 0);
 771   fprintf (file, "    At pos:\t");
 772   if (use->op_p)
 773     print_generic_expr (file, *use->op_p, TDF_SLIM);
 774   fprintf (file, "\n");
 775   dump_iv (file, use->iv, false, 2);
 776 }
 777
 778 /* Dumps information about the uses to FILE.  */
 779
 780 void
 781 dump_groups (FILE *file, struct ivopts_data *data)
 782 {
 783   unsigned i, j;
 784   struct iv_group *group;
 785
 786   for (i = 0; i < data->vgroups.length (); i++)
 787     {
 788       group = data->vgroups[i];
 789       fprintf (file, "Group %d:\n", group->id);
 790       if (group->type == USE_NONLINEAR_EXPR)
 791         fprintf (file, "  Type:\tGENERIC\n");
 792       else if (group->type == USE_REF_ADDRESS)
 793         fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
 794       else if (group->type == USE_PTR_ADDRESS)
 795         fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
 796       else
 797         {
 798           gcc_assert (group->type == USE_COMPARE);
 799           fprintf (file, "  Type:\tCOMPARE\n");
 800         }
 801       for (j = 0; j < group->vuses.length (); j++)
 802         dump_use (file, group->vuses[j]);
 803     }
 804 }
 805
 806 /* Dumps information about induction variable candidate CAND to FILE.  */
 807
 808 void
 809 dump_cand (FILE *file, struct iv_cand *cand)
 810 {
 811   struct iv *iv = cand->iv;
 812
 813   fprintf (file, "Candidate %d:\n", cand->id);
 814   if (cand->inv_vars)
 815     {
 816       fprintf (file, "  Depend on inv.vars: ");
 817       dump_bitmap (file, cand->inv_vars);
 818     }
 819   if (cand->inv_exprs)
 820     {
 821       fprintf (file, "  Depend on inv.exprs: ");
 822       dump_bitmap (file, cand->inv_exprs);
 823     }
 824
 825   if (cand->var_before)
 826     {
 827       fprintf (file, "  Var befor: ");
 828       print_generic_expr (file, cand->var_before, TDF_SLIM);
 829       fprintf (file, "\n");
 830     }
 831   if (cand->var_after)
 832     {
 833       fprintf (file, "  Var after: ");
 834       print_generic_expr (file, cand->var_after, TDF_SLIM);
 835       fprintf (file, "\n");
 836     }
 837
 838   switch (cand->pos)
 839     {
 840     case IP_NORMAL:
 841       fprintf (file, "  Incr POS: before exit test\n");
 842       break;
 843
 844     case IP_BEFORE_USE:
 845       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 846       break;
 847
 848     case IP_AFTER_USE:
 849       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 850       break;
 851
 852     case IP_END:
 853       fprintf (file, "  Incr POS: at end\n");
 854       break;
 855
 856     case IP_ORIGINAL:
 857       fprintf (file, "  Incr POS: orig biv\n");
 858       break;
 859     }
 860
 861   dump_iv (file, iv, false, 1);
 862 }
 863
 864 /* Returns the info for ssa version VER.  */
 865
 866 static inline struct version_info *
 867 ver_info (struct ivopts_data *data, unsigned ver)
 868 {
 869   return data->version_info + ver;
 870 }
 871
 872 /* Returns the info for ssa name NAME.  */
 873
 874 static inline struct version_info *
 875 name_info (struct ivopts_data *data, tree name)
 876 {
 877   return ver_info (data, SSA_NAME_VERSION (name));
 878 }
 879
 880 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 881    emitted in LOOP.  */
 882
 883 static bool
 884 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 885 {
 886   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 887
 888   gcc_assert (bb);
 889
 890   if (sbb == loop->latch)
 891     return true;
 892
 893   if (sbb != bb)
 894     return false;
 895
 896   return stmt == last_stmt (bb);
 897 }
 898
 899 /* Returns true if STMT if after the place where the original induction
 900    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 901    if the positions are identical.  */
 902
 903 static bool
 904 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 905 {
 906   basic_block cand_bb = gimple_bb (cand->incremented_at);
 907   basic_block stmt_bb = gimple_bb (stmt);
 908
 909   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 910     return false;
 911
 912   if (stmt_bb != cand_bb)
 913     return true;
 914
 915   if (true_if_equal
 916       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 917     return true;
 918   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 919 }
 920
 921 /* Returns true if STMT if after the place where the induction variable
 922    CAND is incremented in LOOP.  */
 923
 924 static bool
 925 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 926 {
 927   switch (cand->pos)
 928     {
 929     case IP_END:
 930       return false;
 931
 932     case IP_NORMAL:
 933       return stmt_after_ip_normal_pos (loop, stmt);
 934
 935     case IP_ORIGINAL:
 936     case IP_AFTER_USE:
 937       return stmt_after_inc_pos (cand, stmt, false);
 938
 939     case IP_BEFORE_USE:
 940       return stmt_after_inc_pos (cand, stmt, true);
 941
 942     default:
 943       gcc_unreachable ();
 944     }
 945 }
 946
 947 /* walk_tree callback for contains_abnormal_ssa_name_p.  */
 948
 949 static tree
 950 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
 951 {
 952   if (TREE_CODE (*tp) == SSA_NAME
 953       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
 954     return *tp;
 955
 956   if (!EXPR_P (*tp))
 957     *walk_subtrees = 0;
 958
 959   return NULL_TREE;
 960 }
 961
 962 /* Returns true if EXPR contains a ssa name that occurs in an
 963    abnormal phi node.  */
 964
 965 bool
 966 contains_abnormal_ssa_name_p (tree expr)
 967 {
 968   return walk_tree_without_duplicates
 969            (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
 970 }
 971
 972 /*  Returns the structure describing number of iterations determined from
 973     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 974
 975 static struct tree_niter_desc *
 976 niter_for_exit (struct ivopts_data *data, edge exit)
 977 {
 978   struct tree_niter_desc *desc;
 979   tree_niter_desc **slot;
 980
 981   if (!data->niters)
 982     {
 983       data->niters = new hash_map<edge, tree_niter_desc *>;
 984       slot = NULL;
 985     }
 986   else
 987     slot = data->niters->get (exit);
 988
 989   if (!slot)
 990     {
 991       /* Try to determine number of iterations.  We cannot safely work with ssa
 992          names that appear in phi nodes on abnormal edges, so that we do not
 993          create overlapping life ranges for them (PR 27283).  */
 994       desc = XNEW (struct tree_niter_desc);
 995       if (!number_of_iterations_exit (data->current_loop,
 996                                       exit, desc, true)
 997           || contains_abnormal_ssa_name_p (desc->niter))
 998         {
 999           XDELETE (desc);
1000           desc = NULL;
1001         }
1002       data->niters->put (exit, desc);
1003     }
1004   else
1005     desc = *slot;
1006
1007   return desc;
1008 }
1009
1010 /* Returns the structure describing number of iterations determined from
1011    single dominating exit of DATA->current_loop, or NULL if something
1012    goes wrong.  */
1013
1014 static struct tree_niter_desc *
1015 niter_for_single_dom_exit (struct ivopts_data *data)
1016 {
1017   edge exit = single_dom_exit (data->current_loop);
1018
1019   if (!exit)
1020     return NULL;
1021
1022   return niter_for_exit (data, exit);
1023 }
1024
1025 /* Initializes data structures used by the iv optimization pass, stored
1026    in DATA.  */
1027
1028 static void
1029 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1030 {
1031   data->version_info_size = 2 * num_ssa_names;
1032   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1033   data->relevant = BITMAP_ALLOC (NULL);
1034   data->important_candidates = BITMAP_ALLOC (NULL);
1035   data->max_inv_var_id = 0;
1036   data->max_inv_expr_id = 0;
1037   data->niters = NULL;
1038   data->vgroups.create (20);
1039   data->vcands.create (20);
1040   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1041   data->name_expansion_cache = NULL;
1042   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1043   data->iv_common_cands.create (20);
1044   decl_rtl_to_reset.create (20);
1045   gcc_obstack_init (&data->iv_obstack);
1046 }
1047
1048 /* Returns a memory object to that EXPR points.  In case we are able to
1049    determine that it does not point to any such object, NULL is returned.  */
1050
1051 static tree
1052 determine_base_object (tree expr)
1053 {
1054   enum tree_code code = TREE_CODE (expr);
1055   tree base, obj;
1056
1057   /* If this is a pointer casted to any type, we need to determine
1058      the base object for the pointer; so handle conversions before
1059      throwing away non-pointer expressions.  */
1060   if (CONVERT_EXPR_P (expr))
1061     return determine_base_object (TREE_OPERAND (expr, 0));
1062
1063   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1064     return NULL_TREE;
1065
1066   switch (code)
1067     {
1068     case INTEGER_CST:
1069       return NULL_TREE;
1070
1071     case ADDR_EXPR:
1072       obj = TREE_OPERAND (expr, 0);
1073       base = get_base_address (obj);
1074
1075       if (!base)
1076         return expr;
1077
1078       if (TREE_CODE (base) == MEM_REF)
1079         return determine_base_object (TREE_OPERAND (base, 0));
1080
1081       return fold_convert (ptr_type_node,
1082                            build_fold_addr_expr (base));
1083
1084     case POINTER_PLUS_EXPR:
1085       return determine_base_object (TREE_OPERAND (expr, 0));
1086
1087     case PLUS_EXPR:
1088     case MINUS_EXPR:
1089       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1090       gcc_unreachable ();
1091
1092     default:
1093       if (POLY_INT_CST_P (expr))
1094         return NULL_TREE;
1095       return fold_convert (ptr_type_node, expr);
1096     }
1097 }
1098
1099 /* Return true if address expression with non-DECL_P operand appears
1100    in EXPR.  */
1101
1102 static bool
1103 contain_complex_addr_expr (tree expr)
1104 {
1105   bool res = false;
1106
1107   STRIP_NOPS (expr);
1108   switch (TREE_CODE (expr))
1109     {
1110     case POINTER_PLUS_EXPR:
1111     case PLUS_EXPR:
1112     case MINUS_EXPR:
1113       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1114       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1115       break;
1116
1117     case ADDR_EXPR:
1118       return (!DECL_P (TREE_OPERAND (expr, 0)));
1119
1120     default:
1121       return false;
1122     }
1123
1124   return res;
1125 }
1126
1127 /* Allocates an induction variable with given initial value BASE and step STEP
1128    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1129
1130 static struct iv *
1131 alloc_iv (struct ivopts_data *data, tree base, tree step,
1132           bool no_overflow = false)
1133 {
1134   tree expr = base;
1135   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1136                                               sizeof (struct iv));
1137   gcc_assert (step != NULL_TREE);
1138
1139   /* Lower address expression in base except ones with DECL_P as operand.
1140      By doing this:
1141        1) More accurate cost can be computed for address expressions;
1142        2) Duplicate candidates won't be created for bases in different
1143           forms, like &a[0] and &a.  */
1144   STRIP_NOPS (expr);
1145   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1146       || contain_complex_addr_expr (expr))
1147     {
1148       aff_tree comb;
1149       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1150       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1151     }
1152
1153   iv->base = base;
1154   iv->base_object = determine_base_object (base);
1155   iv->step = step;
1156   iv->biv_p = false;
1157   iv->nonlin_use = NULL;
1158   iv->ssa_name = NULL_TREE;
1159   if (!no_overflow
1160        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1161                               base, step))
1162     no_overflow = true;
1163   iv->no_overflow = no_overflow;
1164   iv->have_address_use = false;
1165
1166   return iv;
1167 }
1168
1169 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1170    doesn't overflow.  */
1171
1172 static void
1173 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1174         bool no_overflow)
1175 {
1176   struct version_info *info = name_info (data, iv);
1177
1178   gcc_assert (!info->iv);
1179
1180   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1181   info->iv = alloc_iv (data, base, step, no_overflow);
1182   info->iv->ssa_name = iv;
1183 }
1184
1185 /* Finds induction variable declaration for VAR.  */
1186
1187 static struct iv *
1188 get_iv (struct ivopts_data *data, tree var)
1189 {
1190   basic_block bb;
1191   tree type = TREE_TYPE (var);
1192
1193   if (!POINTER_TYPE_P (type)
1194       && !INTEGRAL_TYPE_P (type))
1195     return NULL;
1196
1197   if (!name_info (data, var)->iv)
1198     {
1199       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1200
1201       if (!bb
1202           || !flow_bb_inside_loop_p (data->current_loop, bb))
1203         set_iv (data, var, var, build_int_cst (type, 0), true);
1204     }
1205
1206   return name_info (data, var)->iv;
1207 }
1208
1209 /* Return the first non-invariant ssa var found in EXPR.  */
1210
1211 static tree
1212 extract_single_var_from_expr (tree expr)
1213 {
1214   int i, n;
1215   tree tmp;
1216   enum tree_code code;
1217
1218   if (!expr || is_gimple_min_invariant (expr))
1219     return NULL;
1220
1221   code = TREE_CODE (expr);
1222   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1223     {
1224       n = TREE_OPERAND_LENGTH (expr);
1225       for (i = 0; i < n; i++)
1226         {
1227           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1228
1229           if (tmp)
1230             return tmp;
1231         }
1232     }
1233   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1234 }
1235
1236 /* Finds basic ivs.  */
1237
1238 static bool
1239 find_bivs (struct ivopts_data *data)
1240 {
1241   gphi *phi;
1242   affine_iv iv;
1243   tree step, type, base, stop;
1244   bool found = false;
1245   struct loop *loop = data->current_loop;
1246   gphi_iterator psi;
1247
1248   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1249     {
1250       phi = psi.phi ();
1251
1252       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1253         continue;
1254
1255       if (virtual_operand_p (PHI_RESULT (phi)))
1256         continue;
1257
1258       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1259         continue;
1260
1261       if (integer_zerop (iv.step))
1262         continue;
1263
1264       step = iv.step;
1265       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1266       /* Stop expanding iv base at the first ssa var referred by iv step.
1267          Ideally we should stop at any ssa var, because that's expensive
1268          and unusual to happen, we just do it on the first one.
1269
1270          See PR64705 for the rationale.  */
1271       stop = extract_single_var_from_expr (step);
1272       base = expand_simple_operations (base, stop);
1273       if (contains_abnormal_ssa_name_p (base)
1274           || contains_abnormal_ssa_name_p (step))
1275         continue;
1276
1277       type = TREE_TYPE (PHI_RESULT (phi));
1278       base = fold_convert (type, base);
1279       if (step)
1280         {
1281           if (POINTER_TYPE_P (type))
1282             step = convert_to_ptrofftype (step);
1283           else
1284             step = fold_convert (type, step);
1285         }
1286
1287       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1288       found = true;
1289     }
1290
1291   return found;
1292 }
1293
1294 /* Marks basic ivs.  */
1295
1296 static void
1297 mark_bivs (struct ivopts_data *data)
1298 {
1299   gphi *phi;
1300   gimple *def;
1301   tree var;
1302   struct iv *iv, *incr_iv;
1303   struct loop *loop = data->current_loop;
1304   basic_block incr_bb;
1305   gphi_iterator psi;
1306
1307   data->bivs_not_used_in_addr = 0;
1308   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1309     {
1310       phi = psi.phi ();
1311
1312       iv = get_iv (data, PHI_RESULT (phi));
1313       if (!iv)
1314         continue;
1315
1316       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1317       def = SSA_NAME_DEF_STMT (var);
1318       /* Don't mark iv peeled from other one as biv.  */
1319       if (def
1320           && gimple_code (def) == GIMPLE_PHI
1321           && gimple_bb (def) == loop->header)
1322         continue;
1323
1324       incr_iv = get_iv (data, var);
1325       if (!incr_iv)
1326         continue;
1327
1328       /* If the increment is in the subloop, ignore it.  */
1329       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1330       if (incr_bb->loop_father != data->current_loop
1331           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1332         continue;
1333
1334       iv->biv_p = true;
1335       incr_iv->biv_p = true;
1336       if (iv->no_overflow)
1337         data->bivs_not_used_in_addr++;
1338       if (incr_iv->no_overflow)
1339         data->bivs_not_used_in_addr++;
1340     }
1341 }
1342
1343 /* Checks whether STMT defines a linear induction variable and stores its
1344    parameters to IV.  */
1345
1346 static bool
1347 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1348 {
1349   tree lhs, stop;
1350   struct loop *loop = data->current_loop;
1351
1352   iv->base = NULL_TREE;
1353   iv->step = NULL_TREE;
1354
1355   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1356     return false;
1357
1358   lhs = gimple_assign_lhs (stmt);
1359   if (TREE_CODE (lhs) != SSA_NAME)
1360     return false;
1361
1362   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1363     return false;
1364
1365   /* Stop expanding iv base at the first ssa var referred by iv step.
1366      Ideally we should stop at any ssa var, because that's expensive
1367      and unusual to happen, we just do it on the first one.
1368
1369      See PR64705 for the rationale.  */
1370   stop = extract_single_var_from_expr (iv->step);
1371   iv->base = expand_simple_operations (iv->base, stop);
1372   if (contains_abnormal_ssa_name_p (iv->base)
1373       || contains_abnormal_ssa_name_p (iv->step))
1374     return false;
1375
1376   /* If STMT could throw, then do not consider STMT as defining a GIV.
1377      While this will suppress optimizations, we cannot safely delete this
1378      GIV and associated statements, even if it appears it is not used.  */
1379   if (stmt_could_throw_p (cfun, stmt))
1380     return false;
1381
1382   return true;
1383 }
1384
1385 /* Finds general ivs in statement STMT.  */
1386
1387 static void
1388 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1389 {
1390   affine_iv iv;
1391
1392   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1393     return;
1394
1395   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1396 }
1397
1398 /* Finds general ivs in basic block BB.  */
1399
1400 static void
1401 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1402 {
1403   gimple_stmt_iterator bsi;
1404
1405   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1406     find_givs_in_stmt (data, gsi_stmt (bsi));
1407 }
1408
1409 /* Finds general ivs.  */
1410
1411 static void
1412 find_givs (struct ivopts_data *data)
1413 {
1414   struct loop *loop = data->current_loop;
1415   basic_block *body = get_loop_body_in_dom_order (loop);
1416   unsigned i;
1417
1418   for (i = 0; i < loop->num_nodes; i++)
1419     find_givs_in_bb (data, body[i]);
1420   free (body);
1421 }
1422
1423 /* For each ssa name defined in LOOP determines whether it is an induction
1424    variable and if so, its initial value and step.  */
1425
1426 static bool
1427 find_induction_variables (struct ivopts_data *data)
1428 {
1429   unsigned i;
1430   bitmap_iterator bi;
1431
1432   if (!find_bivs (data))
1433     return false;
1434
1435   find_givs (data);
1436   mark_bivs (data);
1437
1438   if (dump_file && (dump_flags & TDF_DETAILS))
1439     {
1440       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1441
1442       if (niter)
1443         {
1444           fprintf (dump_file, "  number of iterations ");
1445           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1446           if (!integer_zerop (niter->may_be_zero))
1447             {
1448               fprintf (dump_file, "; zero if ");
1449               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1450             }
1451           fprintf (dump_file, "\n");
1452         };
1453
1454       fprintf (dump_file, "\n<Induction Vars>:\n");
1455       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1456         {
1457           struct version_info *info = ver_info (data, i);
1458           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1459             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1460         }
1461     }
1462
1463   return true;
1464 }
1465
1466 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1467    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1468    is the const offset stripped from IV base and MEM_TYPE is the type
1469    of the memory being addressed.  For uses of other types, ADDR_BASE
1470    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1471
1472 static struct iv_use *
1473 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1474             gimple *stmt, enum use_type type, tree mem_type,
1475             tree addr_base, poly_uint64 addr_offset)
1476 {
1477   struct iv_use *use = XCNEW (struct iv_use);
1478
1479   use->id = group->vuses.length ();
1480   use->group_id = group->id;
1481   use->type = type;
1482   use->mem_type = mem_type;
1483   use->iv = iv;
1484   use->stmt = stmt;
1485   use->op_p = use_p;
1486   use->addr_base = addr_base;
1487   use->addr_offset = addr_offset;
1488
1489   group->vuses.safe_push (use);
1490   return use;
1491 }
1492
1493 /* Checks whether OP is a loop-level invariant and if so, records it.
1494    NONLINEAR_USE is true if the invariant is used in a way we do not
1495    handle specially.  */
1496
1497 static void
1498 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1499 {
1500   basic_block bb;
1501   struct version_info *info;
1502
1503   if (TREE_CODE (op) != SSA_NAME
1504       || virtual_operand_p (op))
1505     return;
1506
1507   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1508   if (bb
1509       && flow_bb_inside_loop_p (data->current_loop, bb))
1510     return;
1511
1512   info = name_info (data, op);
1513   info->name = op;
1514   info->has_nonlin_use |= nonlinear_use;
1515   if (!info->inv_id)
1516     info->inv_id = ++data->max_inv_var_id;
1517   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1518 }
1519
1520 /* Record a group of TYPE.  */
1521
1522 static struct iv_group *
1523 record_group (struct ivopts_data *data, enum use_type type)
1524 {
1525   struct iv_group *group = XCNEW (struct iv_group);
1526
1527   group->id = data->vgroups.length ();
1528   group->type = type;
1529   group->related_cands = BITMAP_ALLOC (NULL);
1530   group->vuses.create (1);
1531
1532   data->vgroups.safe_push (group);
1533   return group;
1534 }
1535
1536 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1537    New group will be created if there is no existing group for the use.
1538    MEM_TYPE is the type of memory being addressed, or NULL if this
1539    isn't an address reference.  */
1540
1541 static struct iv_use *
1542 record_group_use (struct ivopts_data *data, tree *use_p,
1543                   struct iv *iv, gimple *stmt, enum use_type type,
1544                   tree mem_type)
1545 {
1546   tree addr_base = NULL;
1547   struct iv_group *group = NULL;
1548   poly_uint64 addr_offset = 0;
1549
1550   /* Record non address type use in a new group.  */
1551   if (address_p (type))
1552     {
1553       unsigned int i;
1554
1555       addr_base = strip_offset (iv->base, &addr_offset);
1556       for (i = 0; i < data->vgroups.length (); i++)
1557         {
1558           struct iv_use *use;
1559
1560           group = data->vgroups[i];
1561           use = group->vuses[0];
1562           if (!address_p (use->type))
1563             continue;
1564
1565           /* Check if it has the same stripped base and step.  */
1566           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1567               && operand_equal_p (iv->step, use->iv->step, 0)
1568               && operand_equal_p (addr_base, use->addr_base, 0))
1569             break;
1570         }
1571       if (i == data->vgroups.length ())
1572         group = NULL;
1573     }
1574
1575   if (!group)
1576     group = record_group (data, type);
1577
1578   return record_use (group, use_p, iv, stmt, type, mem_type,
1579                      addr_base, addr_offset);
1580 }
1581
1582 /* Checks whether the use OP is interesting and if so, records it.  */
1583
1584 static struct iv_use *
1585 find_interesting_uses_op (struct ivopts_data *data, tree op)
1586 {
1587   struct iv *iv;
1588   gimple *stmt;
1589   struct iv_use *use;
1590
1591   if (TREE_CODE (op) != SSA_NAME)
1592     return NULL;
1593
1594   iv = get_iv (data, op);
1595   if (!iv)
1596     return NULL;
1597
1598   if (iv->nonlin_use)
1599     {
1600       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1601       return iv->nonlin_use;
1602     }
1603
1604   if (integer_zerop (iv->step))
1605     {
1606       record_invariant (data, op, true);
1607       return NULL;
1608     }
1609
1610   stmt = SSA_NAME_DEF_STMT (op);
1611   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1612
1613   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1614   iv->nonlin_use = use;
1615   return use;
1616 }
1617
1618 /* Indicate how compare type iv_use can be handled.  */
1619 enum comp_iv_rewrite
1620 {
1621   COMP_IV_NA,
1622   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1623   COMP_IV_EXPR,
1624   /* We may rewrite compare type iv_uses on both sides of comparison by
1625      expressing value of each iv_use.  */
1626   COMP_IV_EXPR_2,
1627   /* We may rewrite compare type iv_use by expressing value of the iv_use
1628      or by eliminating it with other iv_cand.  */
1629   COMP_IV_ELIM
1630 };
1631
1632 /* Given a condition in statement STMT, checks whether it is a compare
1633    of an induction variable and an invariant.  If this is the case,
1634    CONTROL_VAR is set to location of the iv, BOUND to the location of
1635    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1636    induction variable descriptions, and true is returned.  If this is not
1637    the case, CONTROL_VAR and BOUND are set to the arguments of the
1638    condition and false is returned.  */
1639
1640 static enum comp_iv_rewrite
1641 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1642                        tree **control_var, tree **bound,
1643                        struct iv **iv_var, struct iv **iv_bound)
1644 {
1645   /* The objects returned when COND has constant operands.  */
1646   static struct iv const_iv;
1647   static tree zero;
1648   tree *op0 = &zero, *op1 = &zero;
1649   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1650   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1651
1652   if (gimple_code (stmt) == GIMPLE_COND)
1653     {
1654       gcond *cond_stmt = as_a <gcond *> (stmt);
1655       op0 = gimple_cond_lhs_ptr (cond_stmt);
1656       op1 = gimple_cond_rhs_ptr (cond_stmt);
1657     }
1658   else
1659     {
1660       op0 = gimple_assign_rhs1_ptr (stmt);
1661       op1 = gimple_assign_rhs2_ptr (stmt);
1662     }
1663
1664   zero = integer_zero_node;
1665   const_iv.step = integer_zero_node;
1666
1667   if (TREE_CODE (*op0) == SSA_NAME)
1668     iv0 = get_iv (data, *op0);
1669   if (TREE_CODE (*op1) == SSA_NAME)
1670     iv1 = get_iv (data, *op1);
1671
1672   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1673   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1674     {
1675       rewrite_type = COMP_IV_EXPR_2;
1676       goto end;
1677     }
1678
1679   /* If none side of comparison is IV.  */
1680   if ((!iv0 || integer_zerop (iv0->step))
1681       && (!iv1 || integer_zerop (iv1->step)))
1682     goto end;
1683
1684   /* Control variable may be on the other side.  */
1685   if (!iv0 || integer_zerop (iv0->step))
1686     {
1687       std::swap (op0, op1);
1688       std::swap (iv0, iv1);
1689     }
1690   /* If one side is IV and the other side isn't loop invariant.  */
1691   if (!iv1)
1692     rewrite_type = COMP_IV_EXPR;
1693   /* If one side is IV and the other side is loop invariant.  */
1694   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1695     rewrite_type = COMP_IV_ELIM;
1696
1697 end:
1698   if (control_var)
1699     *control_var = op0;
1700   if (iv_var)
1701     *iv_var = iv0;
1702   if (bound)
1703     *bound = op1;
1704   if (iv_bound)
1705     *iv_bound = iv1;
1706
1707   return rewrite_type;
1708 }
1709
1710 /* Checks whether the condition in STMT is interesting and if so,
1711    records it.  */
1712
1713 static void
1714 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1715 {
1716   tree *var_p, *bound_p;
1717   struct iv *var_iv, *bound_iv;
1718   enum comp_iv_rewrite ret;
1719
1720   ret = extract_cond_operands (data, stmt,
1721                                &var_p, &bound_p, &var_iv, &bound_iv);
1722   if (ret == COMP_IV_NA)
1723     {
1724       find_interesting_uses_op (data, *var_p);
1725       find_interesting_uses_op (data, *bound_p);
1726       return;
1727     }
1728
1729   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1730   /* Record compare type iv_use for iv on the other side of comparison.  */
1731   if (ret == COMP_IV_EXPR_2)
1732     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1733 }
1734
1735 /* Returns the outermost loop EXPR is obviously invariant in
1736    relative to the loop LOOP, i.e. if all its operands are defined
1737    outside of the returned loop.  Returns NULL if EXPR is not
1738    even obviously invariant in LOOP.  */
1739
1740 struct loop *
1741 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1742 {
1743   basic_block def_bb;
1744   unsigned i, len;
1745
1746   if (is_gimple_min_invariant (expr))
1747     return current_loops->tree_root;
1748
1749   if (TREE_CODE (expr) == SSA_NAME)
1750     {
1751       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1752       if (def_bb)
1753         {
1754           if (flow_bb_inside_loop_p (loop, def_bb))
1755             return NULL;
1756           return superloop_at_depth (loop,
1757                                      loop_depth (def_bb->loop_father) + 1);
1758         }
1759
1760       return current_loops->tree_root;
1761     }
1762
1763   if (!EXPR_P (expr))
1764     return NULL;
1765
1766   unsigned maxdepth = 0;
1767   len = TREE_OPERAND_LENGTH (expr);
1768   for (i = 0; i < len; i++)
1769     {
1770       struct loop *ivloop;
1771       if (!TREE_OPERAND (expr, i))
1772         continue;
1773
1774       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1775       if (!ivloop)
1776         return NULL;
1777       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1778     }
1779
1780   return superloop_at_depth (loop, maxdepth);
1781 }
1782
1783 /* Returns true if expression EXPR is obviously invariant in LOOP,
1784    i.e. if all its operands are defined outside of the LOOP.  LOOP
1785    should not be the function body.  */
1786
1787 bool
1788 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1789 {
1790   basic_block def_bb;
1791   unsigned i, len;
1792
1793   gcc_assert (loop_depth (loop) > 0);
1794
1795   if (is_gimple_min_invariant (expr))
1796     return true;
1797
1798   if (TREE_CODE (expr) == SSA_NAME)
1799     {
1800       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1801       if (def_bb
1802           && flow_bb_inside_loop_p (loop, def_bb))
1803         return false;
1804
1805       return true;
1806     }
1807
1808   if (!EXPR_P (expr))
1809     return false;
1810
1811   len = TREE_OPERAND_LENGTH (expr);
1812   for (i = 0; i < len; i++)
1813     if (TREE_OPERAND (expr, i)
1814         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1815       return false;
1816
1817   return true;
1818 }
1819
1820 /* Given expression EXPR which computes inductive values with respect
1821    to loop recorded in DATA, this function returns biv from which EXPR
1822    is derived by tracing definition chains of ssa variables in EXPR.  */
1823
1824 static struct iv*
1825 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1826 {
1827   struct iv *iv;
1828   unsigned i, n;
1829   tree e2, e1;
1830   enum tree_code code;
1831   gimple *stmt;
1832
1833   if (expr == NULL_TREE)
1834     return NULL;
1835
1836   if (is_gimple_min_invariant (expr))
1837     return NULL;
1838
1839   code = TREE_CODE (expr);
1840   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1841     {
1842       n = TREE_OPERAND_LENGTH (expr);
1843       for (i = 0; i < n; i++)
1844         {
1845           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1846           if (iv)
1847             return iv;
1848         }
1849     }
1850
1851   /* Stop if it's not ssa name.  */
1852   if (code != SSA_NAME)
1853     return NULL;
1854
1855   iv = get_iv (data, expr);
1856   if (!iv || integer_zerop (iv->step))
1857     return NULL;
1858   else if (iv->biv_p)
1859     return iv;
1860
1861   stmt = SSA_NAME_DEF_STMT (expr);
1862   if (gphi *phi = dyn_cast <gphi *> (stmt))
1863     {
1864       ssa_op_iter iter;
1865       use_operand_p use_p;
1866       basic_block phi_bb = gimple_bb (phi);
1867
1868       /* Skip loop header PHI that doesn't define biv.  */
1869       if (phi_bb->loop_father == data->current_loop)
1870         return NULL;
1871
1872       if (virtual_operand_p (gimple_phi_result (phi)))
1873         return NULL;
1874
1875       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1876         {
1877           tree use = USE_FROM_PTR (use_p);
1878           iv = find_deriving_biv_for_expr (data, use);
1879           if (iv)
1880             return iv;
1881         }
1882       return NULL;
1883     }
1884   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1885     return NULL;
1886
1887   e1 = gimple_assign_rhs1 (stmt);
1888   code = gimple_assign_rhs_code (stmt);
1889   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1890     return find_deriving_biv_for_expr (data, e1);
1891
1892   switch (code)
1893     {
1894     case MULT_EXPR:
1895     case PLUS_EXPR:
1896     case MINUS_EXPR:
1897     case POINTER_PLUS_EXPR:
1898       /* Increments, decrements and multiplications by a constant
1899          are simple.  */
1900       e2 = gimple_assign_rhs2 (stmt);
1901       iv = find_deriving_biv_for_expr (data, e2);
1902       if (iv)
1903         return iv;
1904       gcc_fallthrough ();
1905
1906     CASE_CONVERT:
1907       /* Casts are simple.  */
1908       return find_deriving_biv_for_expr (data, e1);
1909
1910     default:
1911       break;
1912     }
1913
1914   return NULL;
1915 }
1916
1917 /* Record BIV, its predecessor and successor that they are used in
1918    address type uses.  */
1919
1920 static void
1921 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1922 {
1923   unsigned i;
1924   tree type, base_1, base_2;
1925   bitmap_iterator bi;
1926
1927   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1928       || biv->have_address_use || !biv->no_overflow)
1929     return;
1930
1931   type = TREE_TYPE (biv->base);
1932   if (!INTEGRAL_TYPE_P (type))
1933     return;
1934
1935   biv->have_address_use = true;
1936   data->bivs_not_used_in_addr--;
1937   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1938   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1939     {
1940       struct iv *iv = ver_info (data, i)->iv;
1941
1942       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1943           || iv->have_address_use || !iv->no_overflow)
1944         continue;
1945
1946       if (type != TREE_TYPE (iv->base)
1947           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1948         continue;
1949
1950       if (!operand_equal_p (biv->step, iv->step, 0))
1951         continue;
1952
1953       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1954       if (operand_equal_p (base_1, iv->base, 0)
1955           || operand_equal_p (base_2, biv->base, 0))
1956         {
1957           iv->have_address_use = true;
1958           data->bivs_not_used_in_addr--;
1959         }
1960     }
1961 }
1962
1963 /* Cumulates the steps of indices into DATA and replaces their values with the
1964    initial ones.  Returns false when the value of the index cannot be determined.
1965    Callback for for_each_index.  */
1966
1967 struct ifs_ivopts_data
1968 {
1969   struct ivopts_data *ivopts_data;
1970   gimple *stmt;
1971   tree step;
1972 };
1973
1974 static bool
1975 idx_find_step (tree base, tree *idx, void *data)
1976 {
1977   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1978   struct iv *iv;
1979   bool use_overflow_semantics = false;
1980   tree step, iv_base, iv_step, lbound, off;
1981   struct loop *loop = dta->ivopts_data->current_loop;
1982
1983   /* If base is a component ref, require that the offset of the reference
1984      be invariant.  */
1985   if (TREE_CODE (base) == COMPONENT_REF)
1986     {
1987       off = component_ref_field_offset (base);
1988       return expr_invariant_in_loop_p (loop, off);
1989     }
1990
1991   /* If base is array, first check whether we will be able to move the
1992      reference out of the loop (in order to take its address in strength
1993      reduction).  In order for this to work we need both lower bound
1994      and step to be loop invariants.  */
1995   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1996     {
1997       /* Moreover, for a range, the size needs to be invariant as well.  */
1998       if (TREE_CODE (base) == ARRAY_RANGE_REF
1999           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2000         return false;
2001
2002       step = array_ref_element_size (base);
2003       lbound = array_ref_low_bound (base);
2004
2005       if (!expr_invariant_in_loop_p (loop, step)
2006           || !expr_invariant_in_loop_p (loop, lbound))
2007         return false;
2008     }
2009
2010   if (TREE_CODE (*idx) != SSA_NAME)
2011     return true;
2012
2013   iv = get_iv (dta->ivopts_data, *idx);
2014   if (!iv)
2015     return false;
2016
2017   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2018           *&x[0], which is not folded and does not trigger the
2019           ARRAY_REF path below.  */
2020   *idx = iv->base;
2021
2022   if (integer_zerop (iv->step))
2023     return true;
2024
2025   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2026     {
2027       step = array_ref_element_size (base);
2028
2029       /* We only handle addresses whose step is an integer constant.  */
2030       if (TREE_CODE (step) != INTEGER_CST)
2031         return false;
2032     }
2033   else
2034     /* The step for pointer arithmetics already is 1 byte.  */
2035     step = size_one_node;
2036
2037   iv_base = iv->base;
2038   iv_step = iv->step;
2039   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2040     use_overflow_semantics = true;
2041
2042   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2043                             sizetype, &iv_base, &iv_step, dta->stmt,
2044                             use_overflow_semantics))
2045     {
2046       /* The index might wrap.  */
2047       return false;
2048     }
2049
2050   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2051   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2052
2053   if (dta->ivopts_data->bivs_not_used_in_addr)
2054     {
2055       if (!iv->biv_p)
2056         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2057
2058       record_biv_for_address_use (dta->ivopts_data, iv);
2059     }
2060   return true;
2061 }
2062
2063 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2064    object is passed to it in DATA.  */
2065
2066 static bool
2067 idx_record_use (tree base, tree *idx,
2068                 void *vdata)
2069 {
2070   struct ivopts_data *data = (struct ivopts_data *) vdata;
2071   find_interesting_uses_op (data, *idx);
2072   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2073     {
2074       find_interesting_uses_op (data, array_ref_element_size (base));
2075       find_interesting_uses_op (data, array_ref_low_bound (base));
2076     }
2077   return true;
2078 }
2079
2080 /* If we can prove that TOP = cst * BOT for some constant cst,
2081    store cst to MUL and return true.  Otherwise return false.
2082    The returned value is always sign-extended, regardless of the
2083    signedness of TOP and BOT.  */
2084
2085 static bool
2086 constant_multiple_of (tree top, tree bot, widest_int *mul)
2087 {
2088   tree mby;
2089   enum tree_code code;
2090   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2091   widest_int res, p0, p1;
2092
2093   STRIP_NOPS (top);
2094   STRIP_NOPS (bot);
2095
2096   if (operand_equal_p (top, bot, 0))
2097     {
2098       *mul = 1;
2099       return true;
2100     }
2101
2102   code = TREE_CODE (top);
2103   switch (code)
2104     {
2105     case MULT_EXPR:
2106       mby = TREE_OPERAND (top, 1);
2107       if (TREE_CODE (mby) != INTEGER_CST)
2108         return false;
2109
2110       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2111         return false;
2112
2113       *mul = wi::sext (res * wi::to_widest (mby), precision);
2114       return true;
2115
2116     case PLUS_EXPR:
2117     case MINUS_EXPR:
2118       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2119           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2120         return false;
2121
2122       if (code == MINUS_EXPR)
2123         p1 = -p1;
2124       *mul = wi::sext (p0 + p1, precision);
2125       return true;
2126
2127     case INTEGER_CST:
2128       if (TREE_CODE (bot) != INTEGER_CST)
2129         return false;
2130
2131       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2132       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2133       if (p1 == 0)
2134         return false;
2135       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2136       return res == 0;
2137
2138     default:
2139       if (POLY_INT_CST_P (top)
2140           && POLY_INT_CST_P (bot)
2141           && constant_multiple_p (wi::to_poly_widest (top),
2142                                   wi::to_poly_widest (bot), mul))
2143         return true;
2144
2145       return false;
2146     }
2147 }
2148
2149 /* Return true if memory reference REF with step STEP may be unaligned.  */
2150
2151 static bool
2152 may_be_unaligned_p (tree ref, tree step)
2153 {
2154   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2155      thus they are not misaligned.  */
2156   if (TREE_CODE (ref) == TARGET_MEM_REF)
2157     return false;
2158
2159   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2160   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2161     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2162
2163   unsigned HOST_WIDE_INT bitpos;
2164   unsigned int ref_align;
2165   get_object_alignment_1 (ref, &ref_align, &bitpos);
2166   if (ref_align < align
2167       || (bitpos % align) != 0
2168       || (bitpos % BITS_PER_UNIT) != 0)
2169     return true;
2170
2171   unsigned int trailing_zeros = tree_ctz (step);
2172   if (trailing_zeros < HOST_BITS_PER_INT
2173       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2174     return true;
2175
2176   return false;
2177 }
2178
2179 /* Return true if EXPR may be non-addressable.   */
2180
2181 bool
2182 may_be_nonaddressable_p (tree expr)
2183 {
2184   switch (TREE_CODE (expr))
2185     {
2186     case VAR_DECL:
2187       /* Check if it's a register variable.  */
2188       return DECL_HARD_REGISTER (expr);
2189
2190     case TARGET_MEM_REF:
2191       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2192          target, thus they are always addressable.  */
2193       return false;
2194
2195     case MEM_REF:
2196       /* Likewise for MEM_REFs, modulo the storage order.  */
2197       return REF_REVERSE_STORAGE_ORDER (expr);
2198
2199     case BIT_FIELD_REF:
2200       if (REF_REVERSE_STORAGE_ORDER (expr))
2201         return true;
2202       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2203
2204     case COMPONENT_REF:
2205       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2206         return true;
2207       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2208              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2209
2210     case ARRAY_REF:
2211     case ARRAY_RANGE_REF:
2212       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2213         return true;
2214       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2215
2216     case VIEW_CONVERT_EXPR:
2217       /* This kind of view-conversions may wrap non-addressable objects
2218          and make them look addressable.  After some processing the
2219          non-addressability may be uncovered again, causing ADDR_EXPRs
2220          of inappropriate objects to be built.  */
2221       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2222           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2223         return true;
2224       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2225
2226     CASE_CONVERT:
2227       return true;
2228
2229     default:
2230       break;
2231     }
2232
2233   return false;
2234 }
2235
2236 /* Finds addresses in *OP_P inside STMT.  */
2237
2238 static void
2239 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2240                                tree *op_p)
2241 {
2242   tree base = *op_p, step = size_zero_node;
2243   struct iv *civ;
2244   struct ifs_ivopts_data ifs_ivopts_data;
2245
2246   /* Do not play with volatile memory references.  A bit too conservative,
2247      perhaps, but safe.  */
2248   if (gimple_has_volatile_ops (stmt))
2249     goto fail;
2250
2251   /* Ignore bitfields for now.  Not really something terribly complicated
2252      to handle.  TODO.  */
2253   if (TREE_CODE (base) == BIT_FIELD_REF)
2254     goto fail;
2255
2256   base = unshare_expr (base);
2257
2258   if (TREE_CODE (base) == TARGET_MEM_REF)
2259     {
2260       tree type = build_pointer_type (TREE_TYPE (base));
2261       tree astep;
2262
2263       if (TMR_BASE (base)
2264           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2265         {
2266           civ = get_iv (data, TMR_BASE (base));
2267           if (!civ)
2268             goto fail;
2269
2270           TMR_BASE (base) = civ->base;
2271           step = civ->step;
2272         }
2273       if (TMR_INDEX2 (base)
2274           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2275         {
2276           civ = get_iv (data, TMR_INDEX2 (base));
2277           if (!civ)
2278             goto fail;
2279
2280           TMR_INDEX2 (base) = civ->base;
2281           step = civ->step;
2282         }
2283       if (TMR_INDEX (base)
2284           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2285         {
2286           civ = get_iv (data, TMR_INDEX (base));
2287           if (!civ)
2288             goto fail;
2289
2290           TMR_INDEX (base) = civ->base;
2291           astep = civ->step;
2292
2293           if (astep)
2294             {
2295               if (TMR_STEP (base))
2296                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2297
2298               step = fold_build2 (PLUS_EXPR, type, step, astep);
2299             }
2300         }
2301
2302       if (integer_zerop (step))
2303         goto fail;
2304       base = tree_mem_ref_addr (type, base);
2305     }
2306   else
2307     {
2308       ifs_ivopts_data.ivopts_data = data;
2309       ifs_ivopts_data.stmt = stmt;
2310       ifs_ivopts_data.step = size_zero_node;
2311       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2312           || integer_zerop (ifs_ivopts_data.step))
2313         goto fail;
2314       step = ifs_ivopts_data.step;
2315
2316       /* Check that the base expression is addressable.  This needs
2317          to be done after substituting bases of IVs into it.  */
2318       if (may_be_nonaddressable_p (base))
2319         goto fail;
2320
2321       /* Moreover, on strict alignment platforms, check that it is
2322          sufficiently aligned.  */
2323       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2324         goto fail;
2325
2326       base = build_fold_addr_expr (base);
2327
2328       /* Substituting bases of IVs into the base expression might
2329          have caused folding opportunities.  */
2330       if (TREE_CODE (base) == ADDR_EXPR)
2331         {
2332           tree *ref = &TREE_OPERAND (base, 0);
2333           while (handled_component_p (*ref))
2334             ref = &TREE_OPERAND (*ref, 0);
2335           if (TREE_CODE (*ref) == MEM_REF)
2336             {
2337               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2338                                       TREE_OPERAND (*ref, 0),
2339                                       TREE_OPERAND (*ref, 1));
2340               if (tem)
2341                 *ref = tem;
2342             }
2343         }
2344     }
2345
2346   civ = alloc_iv (data, base, step);
2347   /* Fail if base object of this memory reference is unknown.  */
2348   if (civ->base_object == NULL_TREE)
2349     goto fail;
2350
2351   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2352   return;
2353
2354 fail:
2355   for_each_index (op_p, idx_record_use, data);
2356 }
2357
2358 /* Finds and records invariants used in STMT.  */
2359
2360 static void
2361 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2362 {
2363   ssa_op_iter iter;
2364   use_operand_p use_p;
2365   tree op;
2366
2367   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2368     {
2369       op = USE_FROM_PTR (use_p);
2370       record_invariant (data, op, false);
2371     }
2372 }
2373
2374 /* CALL calls an internal function.  If operand *OP_P will become an
2375    address when the call is expanded, return the type of the memory
2376    being addressed, otherwise return null.  */
2377
2378 static tree
2379 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2380 {
2381   switch (gimple_call_internal_fn (call))
2382     {
2383     case IFN_MASK_LOAD:
2384     case IFN_MASK_LOAD_LANES:
2385       if (op_p == gimple_call_arg_ptr (call, 0))
2386         return TREE_TYPE (gimple_call_lhs (call));
2387       return NULL_TREE;
2388
2389     case IFN_MASK_STORE:
2390     case IFN_MASK_STORE_LANES:
2391       if (op_p == gimple_call_arg_ptr (call, 0))
2392         return TREE_TYPE (gimple_call_arg (call, 3));
2393       return NULL_TREE;
2394
2395     default:
2396       return NULL_TREE;
2397     }
2398 }
2399
2400 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2401    Return true if the operand will become an address when STMT
2402    is expanded and record the associated address use if so.  */
2403
2404 static bool
2405 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2406                        struct iv *iv)
2407 {
2408   /* Fail if base object of this memory reference is unknown.  */
2409   if (iv->base_object == NULL_TREE)
2410     return false;
2411
2412   tree mem_type = NULL_TREE;
2413   if (gcall *call = dyn_cast <gcall *> (stmt))
2414     if (gimple_call_internal_p (call))
2415       mem_type = get_mem_type_for_internal_fn (call, op_p);
2416   if (mem_type)
2417     {
2418       iv = alloc_iv (data, iv->base, iv->step);
2419       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2420       return true;
2421     }
2422   return false;
2423 }
2424
2425 /* Finds interesting uses of induction variables in the statement STMT.  */
2426
2427 static void
2428 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2429 {
2430   struct iv *iv;
2431   tree op, *lhs, *rhs;
2432   ssa_op_iter iter;
2433   use_operand_p use_p;
2434   enum tree_code code;
2435
2436   find_invariants_stmt (data, stmt);
2437
2438   if (gimple_code (stmt) == GIMPLE_COND)
2439     {
2440       find_interesting_uses_cond (data, stmt);
2441       return;
2442     }
2443
2444   if (is_gimple_assign (stmt))
2445     {
2446       lhs = gimple_assign_lhs_ptr (stmt);
2447       rhs = gimple_assign_rhs1_ptr (stmt);
2448
2449       if (TREE_CODE (*lhs) == SSA_NAME)
2450         {
2451           /* If the statement defines an induction variable, the uses are not
2452              interesting by themselves.  */
2453
2454           iv = get_iv (data, *lhs);
2455
2456           if (iv && !integer_zerop (iv->step))
2457             return;
2458         }
2459
2460       code = gimple_assign_rhs_code (stmt);
2461       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2462           && (REFERENCE_CLASS_P (*rhs)
2463               || is_gimple_val (*rhs)))
2464         {
2465           if (REFERENCE_CLASS_P (*rhs))
2466             find_interesting_uses_address (data, stmt, rhs);
2467           else
2468             find_interesting_uses_op (data, *rhs);
2469
2470           if (REFERENCE_CLASS_P (*lhs))
2471             find_interesting_uses_address (data, stmt, lhs);
2472           return;
2473         }
2474       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2475         {
2476           find_interesting_uses_cond (data, stmt);
2477           return;
2478         }
2479
2480       /* TODO -- we should also handle address uses of type
2481
2482          memory = call (whatever);
2483
2484          and
2485
2486          call (memory).  */
2487     }
2488
2489   if (gimple_code (stmt) == GIMPLE_PHI
2490       && gimple_bb (stmt) == data->current_loop->header)
2491     {
2492       iv = get_iv (data, PHI_RESULT (stmt));
2493
2494       if (iv && !integer_zerop (iv->step))
2495         return;
2496     }
2497
2498   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2499     {
2500       op = USE_FROM_PTR (use_p);
2501
2502       if (TREE_CODE (op) != SSA_NAME)
2503         continue;
2504
2505       iv = get_iv (data, op);
2506       if (!iv)
2507         continue;
2508
2509       if (!find_address_like_use (data, stmt, use_p->use, iv))
2510         find_interesting_uses_op (data, op);
2511     }
2512 }
2513
2514 /* Finds interesting uses of induction variables outside of loops
2515    on loop exit edge EXIT.  */
2516
2517 static void
2518 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2519 {
2520   gphi *phi;
2521   gphi_iterator psi;
2522   tree def;
2523
2524   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2525     {
2526       phi = psi.phi ();
2527       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2528       if (!virtual_operand_p (def))
2529         find_interesting_uses_op (data, def);
2530     }
2531 }
2532
2533 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2534    mode for memory reference represented by USE.  */
2535
2536 static GTY (()) vec<rtx, va_gc> *addr_list;
2537
2538 static bool
2539 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2540 {
2541   rtx reg, addr;
2542   unsigned list_index;
2543   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2544   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2545
2546   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2547   if (list_index >= vec_safe_length (addr_list))
2548     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2549
2550   addr = (*addr_list)[list_index];
2551   if (!addr)
2552     {
2553       addr_mode = targetm.addr_space.address_mode (as);
2554       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2555       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2556       (*addr_list)[list_index] = addr;
2557     }
2558   else
2559     addr_mode = GET_MODE (addr);
2560
2561   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2562   return (memory_address_addr_space_p (mem_mode, addr, as));
2563 }
2564
2565 /* Comparison function to sort group in ascending order of addr_offset.  */
2566
2567 static int
2568 group_compare_offset (const void *a, const void *b)
2569 {
2570   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2571   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2572
2573   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2574 }
2575
2576 /* Check if small groups should be split.  Return true if no group
2577    contains more than two uses with distinct addr_offsets.  Return
2578    false otherwise.  We want to split such groups because:
2579
2580      1) Small groups don't have much benefit and may interfer with
2581         general candidate selection.
2582      2) Size for problem with only small groups is usually small and
2583         general algorithm can handle it well.
2584
2585    TODO -- Above claim may not hold when we want to merge memory
2586    accesses with conseuctive addresses.  */
2587
2588 static bool
2589 split_small_address_groups_p (struct ivopts_data *data)
2590 {
2591   unsigned int i, j, distinct = 1;
2592   struct iv_use *pre;
2593   struct iv_group *group;
2594
2595   for (i = 0; i < data->vgroups.length (); i++)
2596     {
2597       group = data->vgroups[i];
2598       if (group->vuses.length () == 1)
2599         continue;
2600
2601       gcc_assert (address_p (group->type));
2602       if (group->vuses.length () == 2)
2603         {
2604           if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2605                                       group->vuses[1]->addr_offset) > 0)
2606             std::swap (group->vuses[0], group->vuses[1]);
2607         }
2608       else
2609         group->vuses.qsort (group_compare_offset);
2610
2611       if (distinct > 2)
2612         continue;
2613
2614       distinct = 1;
2615       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2616         {
2617           if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2618             {
2619               pre = group->vuses[j];
2620               distinct++;
2621             }
2622
2623           if (distinct > 2)
2624             break;
2625         }
2626     }
2627
2628   return (distinct <= 2);
2629 }
2630
2631 /* For each group of address type uses, this function further groups
2632    these uses according to the maximum offset supported by target's
2633    [base + offset] addressing mode.  */
2634
2635 static void
2636 split_address_groups (struct ivopts_data *data)
2637 {
2638   unsigned int i, j;
2639   /* Always split group.  */
2640   bool split_p = split_small_address_groups_p (data);
2641
2642   for (i = 0; i < data->vgroups.length (); i++)
2643     {
2644       struct iv_group *new_group = NULL;
2645       struct iv_group *group = data->vgroups[i];
2646       struct iv_use *use = group->vuses[0];
2647
2648       use->id = 0;
2649       use->group_id = group->id;
2650       if (group->vuses.length () == 1)
2651         continue;
2652
2653       gcc_assert (address_p (use->type));
2654
2655       for (j = 1; j < group->vuses.length ();)
2656         {
2657           struct iv_use *next = group->vuses[j];
2658           poly_int64 offset = next->addr_offset - use->addr_offset;
2659
2660           /* Split group if aksed to, or the offset against the first
2661              use can't fit in offset part of addressing mode.  IV uses
2662              having the same offset are still kept in one group.  */
2663           if (maybe_ne (offset, 0)
2664               && (split_p || !addr_offset_valid_p (use, offset)))
2665             {
2666               if (!new_group)
2667                 new_group = record_group (data, group->type);
2668               group->vuses.ordered_remove (j);
2669               new_group->vuses.safe_push (next);
2670               continue;
2671             }
2672
2673           next->id = j;
2674           next->group_id = group->id;
2675           j++;
2676         }
2677     }
2678 }
2679
2680 /* Finds uses of the induction variables that are interesting.  */
2681
2682 static void
2683 find_interesting_uses (struct ivopts_data *data)
2684 {
2685   basic_block bb;
2686   gimple_stmt_iterator bsi;
2687   basic_block *body = get_loop_body (data->current_loop);
2688   unsigned i;
2689   edge e;
2690
2691   for (i = 0; i < data->current_loop->num_nodes; i++)
2692     {
2693       edge_iterator ei;
2694       bb = body[i];
2695
2696       FOR_EACH_EDGE (e, ei, bb->succs)
2697         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2698             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2699           find_interesting_uses_outside (data, e);
2700
2701       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2702         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2703       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2704         if (!is_gimple_debug (gsi_stmt (bsi)))
2705           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2706     }
2707   free (body);
2708
2709   split_address_groups (data);
2710
2711   if (dump_file && (dump_flags & TDF_DETAILS))
2712     {
2713       fprintf (dump_file, "\n<IV Groups>:\n");
2714       dump_groups (dump_file, data);
2715       fprintf (dump_file, "\n");
2716     }
2717 }
2718
2719 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2720    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2721    we are at the top-level of the processed address.  */
2722
2723 static tree
2724 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2725                 poly_int64 *offset)
2726 {
2727   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2728   enum tree_code code;
2729   tree type, orig_type = TREE_TYPE (expr);
2730   poly_int64 off0, off1;
2731   HOST_WIDE_INT st;
2732   tree orig_expr = expr;
2733
2734   STRIP_NOPS (expr);
2735
2736   type = TREE_TYPE (expr);
2737   code = TREE_CODE (expr);
2738   *offset = 0;
2739
2740   switch (code)
2741     {
2742     case POINTER_PLUS_EXPR:
2743     case PLUS_EXPR:
2744     case MINUS_EXPR:
2745       op0 = TREE_OPERAND (expr, 0);
2746       op1 = TREE_OPERAND (expr, 1);
2747
2748       op0 = strip_offset_1 (op0, false, false, &off0);
2749       op1 = strip_offset_1 (op1, false, false, &off1);
2750
2751       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2752       if (op0 == TREE_OPERAND (expr, 0)
2753           && op1 == TREE_OPERAND (expr, 1))
2754         return orig_expr;
2755
2756       if (integer_zerop (op1))
2757         expr = op0;
2758       else if (integer_zerop (op0))
2759         {
2760           if (code == MINUS_EXPR)
2761             expr = fold_build1 (NEGATE_EXPR, type, op1);
2762           else
2763             expr = op1;
2764         }
2765       else
2766         expr = fold_build2 (code, type, op0, op1);
2767
2768       return fold_convert (orig_type, expr);
2769
2770     case MULT_EXPR:
2771       op1 = TREE_OPERAND (expr, 1);
2772       if (!cst_and_fits_in_hwi (op1))
2773         return orig_expr;
2774
2775       op0 = TREE_OPERAND (expr, 0);
2776       op0 = strip_offset_1 (op0, false, false, &off0);
2777       if (op0 == TREE_OPERAND (expr, 0))
2778         return orig_expr;
2779
2780       *offset = off0 * int_cst_value (op1);
2781       if (integer_zerop (op0))
2782         expr = op0;
2783       else
2784         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2785
2786       return fold_convert (orig_type, expr);
2787
2788     case ARRAY_REF:
2789     case ARRAY_RANGE_REF:
2790       if (!inside_addr)
2791         return orig_expr;
2792
2793       step = array_ref_element_size (expr);
2794       if (!cst_and_fits_in_hwi (step))
2795         break;
2796
2797       st = int_cst_value (step);
2798       op1 = TREE_OPERAND (expr, 1);
2799       op1 = strip_offset_1 (op1, false, false, &off1);
2800       *offset = off1 * st;
2801
2802       if (top_compref
2803           && integer_zerop (op1))
2804         {
2805           /* Strip the component reference completely.  */
2806           op0 = TREE_OPERAND (expr, 0);
2807           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2808           *offset += off0;
2809           return op0;
2810         }
2811       break;
2812
2813     case COMPONENT_REF:
2814       {
2815         tree field;
2816
2817         if (!inside_addr)
2818           return orig_expr;
2819
2820         tmp = component_ref_field_offset (expr);
2821         field = TREE_OPERAND (expr, 1);
2822         if (top_compref
2823             && cst_and_fits_in_hwi (tmp)
2824             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2825           {
2826             HOST_WIDE_INT boffset, abs_off;
2827
2828             /* Strip the component reference completely.  */
2829             op0 = TREE_OPERAND (expr, 0);
2830             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2831             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2832             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2833             if (boffset < 0)
2834               abs_off = -abs_off;
2835
2836             *offset = off0 + int_cst_value (tmp) + abs_off;
2837             return op0;
2838           }
2839       }
2840       break;
2841
2842     case ADDR_EXPR:
2843       op0 = TREE_OPERAND (expr, 0);
2844       op0 = strip_offset_1 (op0, true, true, &off0);
2845       *offset += off0;
2846
2847       if (op0 == TREE_OPERAND (expr, 0))
2848         return orig_expr;
2849
2850       expr = build_fold_addr_expr (op0);
2851       return fold_convert (orig_type, expr);
2852
2853     case MEM_REF:
2854       /* ???  Offset operand?  */
2855       inside_addr = false;
2856       break;
2857
2858     default:
2859       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2860         return build_int_cst (orig_type, 0);
2861       return orig_expr;
2862     }
2863
2864   /* Default handling of expressions for that we want to recurse into
2865      the first operand.  */
2866   op0 = TREE_OPERAND (expr, 0);
2867   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2868   *offset += off0;
2869
2870   if (op0 == TREE_OPERAND (expr, 0)
2871       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2872     return orig_expr;
2873
2874   expr = copy_node (expr);
2875   TREE_OPERAND (expr, 0) = op0;
2876   if (op1)
2877     TREE_OPERAND (expr, 1) = op1;
2878
2879   /* Inside address, we might strip the top level component references,
2880      thus changing type of the expression.  Handling of ADDR_EXPR
2881      will fix that.  */
2882   expr = fold_convert (orig_type, expr);
2883
2884   return expr;
2885 }
2886
2887 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2888
2889 tree
2890 strip_offset (tree expr, poly_uint64_pod *offset)
2891 {
2892   poly_int64 off;
2893   tree core = strip_offset_1 (expr, false, false, &off);
2894   *offset = off;
2895   return core;
2896 }
2897
2898 /* Returns variant of TYPE that can be used as base for different uses.
2899    We return unsigned type with the same precision, which avoids problems
2900    with overflows.  */
2901
2902 static tree
2903 generic_type_for (tree type)
2904 {
2905   if (POINTER_TYPE_P (type))
2906     return unsigned_type_for (type);
2907
2908   if (TYPE_UNSIGNED (type))
2909     return type;
2910
2911   return unsigned_type_for (type);
2912 }
2913
2914 /* Private data for walk_tree.  */
2915
2916 struct walk_tree_data
2917 {
2918   bitmap *inv_vars;
2919   struct ivopts_data *idata;
2920 };
2921
2922 /* Callback function for walk_tree, it records invariants and symbol
2923    reference in *EXPR_P.  DATA is the structure storing result info.  */
2924
2925 static tree
2926 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2927 {
2928   tree op = *expr_p;
2929   struct version_info *info;
2930   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2931
2932   if (TREE_CODE (op) != SSA_NAME)
2933     return NULL_TREE;
2934
2935   info = name_info (wdata->idata, op);
2936   /* Because we expand simple operations when finding IVs, loop invariant
2937      variable that isn't referred by the original loop could be used now.
2938      Record such invariant variables here.  */
2939   if (!info->iv)
2940     {
2941       struct ivopts_data *idata = wdata->idata;
2942       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2943
2944       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2945         {
2946           set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
2947           record_invariant (idata, op, false);
2948         }
2949     }
2950   if (!info->inv_id || info->has_nonlin_use)
2951     return NULL_TREE;
2952
2953   if (!*wdata->inv_vars)
2954     *wdata->inv_vars = BITMAP_ALLOC (NULL);
2955   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2956
2957   return NULL_TREE;
2958 }
2959
2960 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
2961    store it.  */
2962
2963 static inline void
2964 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2965 {
2966   struct walk_tree_data wdata;
2967
2968   if (!inv_vars)
2969     return;
2970
2971   wdata.idata = data;
2972   wdata.inv_vars = inv_vars;
2973   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
2974 }
2975
2976 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
2977    will be recorded if it doesn't exist yet.  Given below two exprs:
2978      inv_expr + cst1, inv_expr + cst2
2979    It's hard to make decision whether constant part should be stripped
2980    or not.  We choose to not strip based on below facts:
2981      1) We need to count ADD cost for constant part if it's stripped,
2982         which isn't always trivial where this functions is called.
2983      2) Stripping constant away may be conflict with following loop
2984         invariant hoisting pass.
2985      3) Not stripping constant away results in more invariant exprs,
2986         which usually leads to decision preferring lower reg pressure.  */
2987
2988 static iv_inv_expr_ent *
2989 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
2990 {
2991   STRIP_NOPS (inv_expr);
2992
2993   if (poly_int_tree_p (inv_expr)
2994       || TREE_CODE (inv_expr) == SSA_NAME)
2995     return NULL;
2996
2997   /* Don't strip constant part away as we used to.  */
2998
2999   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3000   struct iv_inv_expr_ent ent;
3001   ent.expr = inv_expr;
3002   ent.hash = iterative_hash_expr (inv_expr, 0);
3003   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3004
3005   if (!*slot)
3006     {
3007       *slot = XNEW (struct iv_inv_expr_ent);
3008       (*slot)->expr = inv_expr;
3009       (*slot)->hash = ent.hash;
3010       (*slot)->id = ++data->max_inv_expr_id;
3011     }
3012
3013   return *slot;
3014 }
3015
3016 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3017    position to POS.  If USE is not NULL, the candidate is set as related to
3018    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3019    replacement of the final value of the iv by a direct computation.  */
3020
3021 static struct iv_cand *
3022 add_candidate_1 (struct ivopts_data *data,
3023                  tree base, tree step, bool important, enum iv_position pos,
3024                  struct iv_use *use, gimple *incremented_at,
3025                  struct iv *orig_iv = NULL)
3026 {
3027   unsigned i;
3028   struct iv_cand *cand = NULL;
3029   tree type, orig_type;
3030
3031   gcc_assert (base && step);
3032
3033   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3034      live, but the ivopts code may replace a real pointer with one
3035      pointing before or after the memory block that is then adjusted
3036      into the memory block during the loop.  FIXME: It would likely be
3037      better to actually force the pointer live and still use ivopts;
3038      for example, it would be enough to write the pointer into memory
3039      and keep it there until after the loop.  */
3040   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3041     return NULL;
3042
3043   /* For non-original variables, make sure their values are computed in a type
3044      that does not invoke undefined behavior on overflows (since in general,
3045      we cannot prove that these induction variables are non-wrapping).  */
3046   if (pos != IP_ORIGINAL)
3047     {
3048       orig_type = TREE_TYPE (base);
3049       type = generic_type_for (orig_type);
3050       if (type != orig_type)
3051         {
3052           base = fold_convert (type, base);
3053           step = fold_convert (type, step);
3054         }
3055     }
3056
3057   for (i = 0; i < data->vcands.length (); i++)
3058     {
3059       cand = data->vcands[i];
3060
3061       if (cand->pos != pos)
3062         continue;
3063
3064       if (cand->incremented_at != incremented_at
3065           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3066               && cand->ainc_use != use))
3067         continue;
3068
3069       if (operand_equal_p (base, cand->iv->base, 0)
3070           && operand_equal_p (step, cand->iv->step, 0)
3071           && (TYPE_PRECISION (TREE_TYPE (base))
3072               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3073         break;
3074     }
3075
3076   if (i == data->vcands.length ())
3077     {
3078       cand = XCNEW (struct iv_cand);
3079       cand->id = i;
3080       cand->iv = alloc_iv (data, base, step);
3081       cand->pos = pos;
3082       if (pos != IP_ORIGINAL)
3083         {
3084           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3085           cand->var_after = cand->var_before;
3086         }
3087       cand->important = important;
3088       cand->incremented_at = incremented_at;
3089       data->vcands.safe_push (cand);
3090
3091       if (!poly_int_tree_p (step))
3092         {
3093           find_inv_vars (data, &step, &cand->inv_vars);
3094
3095           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3096           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3097           if (inv_expr != NULL)
3098             {
3099               cand->inv_exprs = cand->inv_vars;
3100               cand->inv_vars = NULL;
3101               if (cand->inv_exprs)
3102                 bitmap_clear (cand->inv_exprs);
3103               else
3104                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3105
3106               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3107             }
3108         }
3109
3110       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3111         cand->ainc_use = use;
3112       else
3113         cand->ainc_use = NULL;
3114
3115       cand->orig_iv = orig_iv;
3116       if (dump_file && (dump_flags & TDF_DETAILS))
3117         dump_cand (dump_file, cand);
3118     }
3119
3120   cand->important |= important;
3121
3122   /* Relate candidate to the group for which it is added.  */
3123   if (use)
3124     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3125
3126   return cand;
3127 }
3128
3129 /* Returns true if incrementing the induction variable at the end of the LOOP
3130    is allowed.
3131
3132    The purpose is to avoid splitting latch edge with a biv increment, thus
3133    creating a jump, possibly confusing other optimization passes and leaving
3134    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3135    available (so we do not have a better alternative), or if the latch edge
3136    is already nonempty.  */
3137
3138 static bool
3139 allow_ip_end_pos_p (struct loop *loop)
3140 {
3141   if (!ip_normal_pos (loop))
3142     return true;
3143
3144   if (!empty_block_p (ip_end_pos (loop)))
3145     return true;
3146
3147   return false;
3148 }
3149
3150 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3151    Important field is set to IMPORTANT.  */
3152
3153 static void
3154 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3155                         bool important, struct iv_use *use)
3156 {
3157   basic_block use_bb = gimple_bb (use->stmt);
3158   machine_mode mem_mode;
3159   unsigned HOST_WIDE_INT cstepi;
3160
3161   /* If we insert the increment in any position other than the standard
3162      ones, we must ensure that it is incremented once per iteration.
3163      It must not be in an inner nested loop, or one side of an if
3164      statement.  */
3165   if (use_bb->loop_father != data->current_loop
3166       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3167       || stmt_can_throw_internal (cfun, use->stmt)
3168       || !cst_and_fits_in_hwi (step))
3169     return;
3170
3171   cstepi = int_cst_value (step);
3172
3173   mem_mode = TYPE_MODE (use->mem_type);
3174   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3175         || USE_STORE_PRE_INCREMENT (mem_mode))
3176        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3177       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3178            || USE_STORE_PRE_DECREMENT (mem_mode))
3179           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3180     {
3181       enum tree_code code = MINUS_EXPR;
3182       tree new_base;
3183       tree new_step = step;
3184
3185       if (POINTER_TYPE_P (TREE_TYPE (base)))
3186         {
3187           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3188           code = POINTER_PLUS_EXPR;
3189         }
3190       else
3191         new_step = fold_convert (TREE_TYPE (base), new_step);
3192       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3193       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3194                        use->stmt);
3195     }
3196   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3197         || USE_STORE_POST_INCREMENT (mem_mode))
3198        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3199       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3200            || USE_STORE_POST_DECREMENT (mem_mode))
3201           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3202     {
3203       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3204                        use->stmt);
3205     }
3206 }
3207
3208 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3209    position to POS.  If USE is not NULL, the candidate is set as related to
3210    it.  The candidate computation is scheduled before exit condition and at
3211    the end of loop.  */
3212
3213 static void
3214 add_candidate (struct ivopts_data *data,
3215                tree base, tree step, bool important, struct iv_use *use,
3216                struct iv *orig_iv = NULL)
3217 {
3218   if (ip_normal_pos (data->current_loop))
3219     add_candidate_1 (data, base, step, important,
3220                      IP_NORMAL, use, NULL, orig_iv);
3221   if (ip_end_pos (data->current_loop)
3222       && allow_ip_end_pos_p (data->current_loop))
3223     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3224 }
3225
3226 /* Adds standard iv candidates.  */
3227
3228 static void
3229 add_standard_iv_candidates (struct ivopts_data *data)
3230 {
3231   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3232
3233   /* The same for a double-integer type if it is still fast enough.  */
3234   if (TYPE_PRECISION
3235         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3236       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3237     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3238                    build_int_cst (long_integer_type_node, 1), true, NULL);
3239
3240   /* The same for a double-integer type if it is still fast enough.  */
3241   if (TYPE_PRECISION
3242         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3243       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3244     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3245                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3246 }
3247
3248
3249 /* Adds candidates bases on the old induction variable IV.  */
3250
3251 static void
3252 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3253 {
3254   gimple *phi;
3255   tree def;
3256   struct iv_cand *cand;
3257
3258   /* Check if this biv is used in address type use.  */
3259   if (iv->no_overflow  && iv->have_address_use
3260       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3261       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3262     {
3263       tree base = fold_convert (sizetype, iv->base);
3264       tree step = fold_convert (sizetype, iv->step);
3265
3266       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3267       add_candidate (data, base, step, true, NULL, iv);
3268       /* Add iv cand of the original type only if it has nonlinear use.  */
3269       if (iv->nonlin_use)
3270         add_candidate (data, iv->base, iv->step, true, NULL);
3271     }
3272   else
3273     add_candidate (data, iv->base, iv->step, true, NULL);
3274
3275   /* The same, but with initial value zero.  */
3276   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3277     add_candidate (data, size_int (0), iv->step, true, NULL);
3278   else
3279     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3280                    iv->step, true, NULL);
3281
3282   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3283   if (gimple_code (phi) == GIMPLE_PHI)
3284     {
3285       /* Additionally record the possibility of leaving the original iv
3286          untouched.  */
3287       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3288       /* Don't add candidate if it's from another PHI node because
3289          it's an affine iv appearing in the form of PEELED_CHREC.  */
3290       phi = SSA_NAME_DEF_STMT (def);
3291       if (gimple_code (phi) != GIMPLE_PHI)
3292         {
3293           cand = add_candidate_1 (data,
3294                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3295                                   SSA_NAME_DEF_STMT (def));
3296           if (cand)
3297             {
3298               cand->var_before = iv->ssa_name;
3299               cand->var_after = def;
3300             }
3301         }
3302       else
3303         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3304     }
3305 }
3306
3307 /* Adds candidates based on the old induction variables.  */
3308
3309 static void
3310 add_iv_candidate_for_bivs (struct ivopts_data *data)
3311 {
3312   unsigned i;
3313   struct iv *iv;
3314   bitmap_iterator bi;
3315
3316   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3317     {
3318       iv = ver_info (data, i)->iv;
3319       if (iv && iv->biv_p && !integer_zerop (iv->step))
3320         add_iv_candidate_for_biv (data, iv);
3321     }
3322 }
3323
3324 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3325
3326 static void
3327 record_common_cand (struct ivopts_data *data, tree base,
3328                     tree step, struct iv_use *use)
3329 {
3330   struct iv_common_cand ent;
3331   struct iv_common_cand **slot;
3332
3333   ent.base = base;
3334   ent.step = step;
3335   ent.hash = iterative_hash_expr (base, 0);
3336   ent.hash = iterative_hash_expr (step, ent.hash);
3337
3338   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3339   if (*slot == NULL)
3340     {
3341       *slot = new iv_common_cand ();
3342       (*slot)->base = base;
3343       (*slot)->step = step;
3344       (*slot)->uses.create (8);
3345       (*slot)->hash = ent.hash;
3346       data->iv_common_cands.safe_push ((*slot));
3347     }
3348
3349   gcc_assert (use != NULL);
3350   (*slot)->uses.safe_push (use);
3351   return;
3352 }
3353
3354 /* Comparison function used to sort common candidates.  */
3355
3356 static int
3357 common_cand_cmp (const void *p1, const void *p2)
3358 {
3359   unsigned n1, n2;
3360   const struct iv_common_cand *const *const ccand1
3361     = (const struct iv_common_cand *const *)p1;
3362   const struct iv_common_cand *const *const ccand2
3363     = (const struct iv_common_cand *const *)p2;
3364
3365   n1 = (*ccand1)->uses.length ();
3366   n2 = (*ccand2)->uses.length ();
3367   return n2 - n1;
3368 }
3369
3370 /* Adds IV candidates based on common candidated recorded.  */
3371
3372 static void
3373 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3374 {
3375   unsigned i, j;
3376   struct iv_cand *cand_1, *cand_2;
3377
3378   data->iv_common_cands.qsort (common_cand_cmp);
3379   for (i = 0; i < data->iv_common_cands.length (); i++)
3380     {
3381       struct iv_common_cand *ptr = data->iv_common_cands[i];
3382
3383       /* Only add IV candidate if it's derived from multiple uses.  */
3384       if (ptr->uses.length () <= 1)
3385         break;
3386
3387       cand_1 = NULL;
3388       cand_2 = NULL;
3389       if (ip_normal_pos (data->current_loop))
3390         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3391                                   false, IP_NORMAL, NULL, NULL);
3392
3393       if (ip_end_pos (data->current_loop)
3394           && allow_ip_end_pos_p (data->current_loop))
3395         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3396                                   false, IP_END, NULL, NULL);
3397
3398       /* Bind deriving uses and the new candidates.  */
3399       for (j = 0; j < ptr->uses.length (); j++)
3400         {
3401           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3402           if (cand_1)
3403             bitmap_set_bit (group->related_cands, cand_1->id);
3404           if (cand_2)
3405             bitmap_set_bit (group->related_cands, cand_2->id);
3406         }
3407     }
3408
3409   /* Release data since it is useless from this point.  */
3410   data->iv_common_cand_tab->empty ();
3411   data->iv_common_cands.truncate (0);
3412 }
3413
3414 /* Adds candidates based on the value of USE's iv.  */
3415
3416 static void
3417 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3418 {
3419   poly_uint64 offset;
3420   tree base;
3421   tree basetype;
3422   struct iv *iv = use->iv;
3423
3424   add_candidate (data, iv->base, iv->step, false, use);
3425
3426   /* Record common candidate for use in case it can be shared by others.  */
3427   record_common_cand (data, iv->base, iv->step, use);
3428
3429   /* Record common candidate with initial value zero.  */
3430   basetype = TREE_TYPE (iv->base);
3431   if (POINTER_TYPE_P (basetype))
3432     basetype = sizetype;
3433   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3434
3435   /* Compare the cost of an address with an unscaled index with the cost of
3436     an address with a scaled index and add candidate if useful.  */
3437   poly_int64 step;
3438   if (use != NULL
3439       && poly_int_tree_p (iv->step, &step)
3440       && address_p (use->type))
3441     {
3442       poly_int64 new_step;
3443       unsigned int fact = preferred_mem_scale_factor
3444         (use->iv->base,
3445          TYPE_MODE (use->mem_type),
3446          optimize_loop_for_speed_p (data->current_loop));
3447
3448       if (fact != 1
3449           && multiple_p (step, fact, &new_step))
3450         add_candidate (data, size_int (0),
3451                        wide_int_to_tree (sizetype, new_step),
3452                        true, NULL);
3453     }
3454
3455   /* Record common candidate with constant offset stripped in base.
3456      Like the use itself, we also add candidate directly for it.  */
3457   base = strip_offset (iv->base, &offset);
3458   if (maybe_ne (offset, 0U) || base != iv->base)
3459     {
3460       record_common_cand (data, base, iv->step, use);
3461       add_candidate (data, base, iv->step, false, use);
3462     }
3463
3464   /* Record common candidate with base_object removed in base.  */
3465   base = iv->base;
3466   STRIP_NOPS (base);
3467   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3468     {
3469       tree step = iv->step;
3470
3471       STRIP_NOPS (step);
3472       base = TREE_OPERAND (base, 1);
3473       step = fold_convert (sizetype, step);
3474       record_common_cand (data, base, step, use);
3475       /* Also record common candidate with offset stripped.  */
3476       base = strip_offset (base, &offset);
3477       if (maybe_ne (offset, 0U))
3478         record_common_cand (data, base, step, use);
3479     }
3480
3481   /* At last, add auto-incremental candidates.  Make such variables
3482      important since other iv uses with same base object may be based
3483      on it.  */
3484   if (use != NULL && address_p (use->type))
3485     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3486 }
3487
3488 /* Adds candidates based on the uses.  */
3489
3490 static void
3491 add_iv_candidate_for_groups (struct ivopts_data *data)
3492 {
3493   unsigned i;
3494
3495   /* Only add candidate for the first use in group.  */
3496   for (i = 0; i < data->vgroups.length (); i++)
3497     {
3498       struct iv_group *group = data->vgroups[i];
3499
3500       gcc_assert (group->vuses[0] != NULL);
3501       add_iv_candidate_for_use (data, group->vuses[0]);
3502     }
3503   add_iv_candidate_derived_from_uses (data);
3504 }
3505
3506 /* Record important candidates and add them to related_cands bitmaps.  */
3507
3508 static void
3509 record_important_candidates (struct ivopts_data *data)
3510 {
3511   unsigned i;
3512   struct iv_group *group;
3513
3514   for (i = 0; i < data->vcands.length (); i++)
3515     {
3516       struct iv_cand *cand = data->vcands[i];
3517
3518       if (cand->important)
3519         bitmap_set_bit (data->important_candidates, i);
3520     }
3521
3522   data->consider_all_candidates = (data->vcands.length ()
3523                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3524
3525   /* Add important candidates to groups' related_cands bitmaps.  */
3526   for (i = 0; i < data->vgroups.length (); i++)
3527     {
3528       group = data->vgroups[i];
3529       bitmap_ior_into (group->related_cands, data->important_candidates);
3530     }
3531 }
3532
3533 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3534    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3535    we allocate a simple list to every use.  */
3536
3537 static void
3538 alloc_use_cost_map (struct ivopts_data *data)
3539 {
3540   unsigned i, size, s;
3541
3542   for (i = 0; i < data->vgroups.length (); i++)
3543     {
3544       struct iv_group *group = data->vgroups[i];
3545
3546       if (data->consider_all_candidates)
3547         size = data->vcands.length ();
3548       else
3549         {
3550           s = bitmap_count_bits (group->related_cands);
3551
3552           /* Round up to the power of two, so that moduling by it is fast.  */
3553           size = s ? (1 << ceil_log2 (s)) : 1;
3554         }
3555
3556       group->n_map_members = size;
3557       group->cost_map = XCNEWVEC (struct cost_pair, size);
3558     }
3559 }
3560
3561 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3562    on invariants INV_VARS and that the value used in expressing it is
3563    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3564
3565 static void
3566 set_group_iv_cost (struct ivopts_data *data,
3567                    struct iv_group *group, struct iv_cand *cand,
3568                    comp_cost cost, bitmap inv_vars, tree value,
3569                    enum tree_code comp, bitmap inv_exprs)
3570 {
3571   unsigned i, s;
3572
3573   if (cost.infinite_cost_p ())
3574     {
3575       BITMAP_FREE (inv_vars);
3576       BITMAP_FREE (inv_exprs);
3577       return;
3578     }
3579
3580   if (data->consider_all_candidates)
3581     {
3582       group->cost_map[cand->id].cand = cand;
3583       group->cost_map[cand->id].cost = cost;
3584       group->cost_map[cand->id].inv_vars = inv_vars;
3585       group->cost_map[cand->id].inv_exprs = inv_exprs;
3586       group->cost_map[cand->id].value = value;
3587       group->cost_map[cand->id].comp = comp;
3588       return;
3589     }
3590
3591   /* n_map_members is a power of two, so this computes modulo.  */
3592   s = cand->id & (group->n_map_members - 1);
3593   for (i = s; i < group->n_map_members; i++)
3594     if (!group->cost_map[i].cand)
3595       goto found;
3596   for (i = 0; i < s; i++)
3597     if (!group->cost_map[i].cand)
3598       goto found;
3599
3600   gcc_unreachable ();
3601
3602 found:
3603   group->cost_map[i].cand = cand;
3604   group->cost_map[i].cost = cost;
3605   group->cost_map[i].inv_vars = inv_vars;
3606   group->cost_map[i].inv_exprs = inv_exprs;
3607   group->cost_map[i].value = value;
3608   group->cost_map[i].comp = comp;
3609 }
3610
3611 /* Gets cost of (GROUP, CAND) pair.  */
3612
3613 static struct cost_pair *
3614 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3615                    struct iv_cand *cand)
3616 {
3617   unsigned i, s;
3618   struct cost_pair *ret;
3619
3620   if (!cand)
3621     return NULL;
3622
3623   if (data->consider_all_candidates)
3624     {
3625       ret = group->cost_map + cand->id;
3626       if (!ret->cand)
3627         return NULL;
3628
3629       return ret;
3630     }
3631
3632   /* n_map_members is a power of two, so this computes modulo.  */
3633   s = cand->id & (group->n_map_members - 1);
3634   for (i = s; i < group->n_map_members; i++)
3635     if (group->cost_map[i].cand == cand)
3636       return group->cost_map + i;
3637     else if (group->cost_map[i].cand == NULL)
3638       return NULL;
3639   for (i = 0; i < s; i++)
3640     if (group->cost_map[i].cand == cand)
3641       return group->cost_map + i;
3642     else if (group->cost_map[i].cand == NULL)
3643       return NULL;
3644
3645   return NULL;
3646 }
3647
3648 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3649 static rtx
3650 produce_memory_decl_rtl (tree obj, int *regno)
3651 {
3652   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3653   machine_mode address_mode = targetm.addr_space.address_mode (as);
3654   rtx x;
3655
3656   gcc_assert (obj);
3657   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3658     {
3659       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3660       x = gen_rtx_SYMBOL_REF (address_mode, name);
3661       SET_SYMBOL_REF_DECL (x, obj);
3662       x = gen_rtx_MEM (DECL_MODE (obj), x);
3663       set_mem_addr_space (x, as);
3664       targetm.encode_section_info (obj, x, true);
3665     }
3666   else
3667     {
3668       x = gen_raw_REG (address_mode, (*regno)++);
3669       x = gen_rtx_MEM (DECL_MODE (obj), x);
3670       set_mem_addr_space (x, as);
3671     }
3672
3673   return x;
3674 }
3675
3676 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3677    walk_tree.  DATA contains the actual fake register number.  */
3678
3679 static tree
3680 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3681 {
3682   tree obj = NULL_TREE;
3683   rtx x = NULL_RTX;
3684   int *regno = (int *) data;
3685
3686   switch (TREE_CODE (*expr_p))
3687     {
3688     case ADDR_EXPR:
3689       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3690            handled_component_p (*expr_p);
3691            expr_p = &TREE_OPERAND (*expr_p, 0))
3692         continue;
3693       obj = *expr_p;
3694       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3695         x = produce_memory_decl_rtl (obj, regno);
3696       break;
3697
3698     case SSA_NAME:
3699       *ws = 0;
3700       obj = SSA_NAME_VAR (*expr_p);
3701       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3702       if (!obj)
3703         return NULL_TREE;
3704       if (!DECL_RTL_SET_P (obj))
3705         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3706       break;
3707
3708     case VAR_DECL:
3709     case PARM_DECL:
3710     case RESULT_DECL:
3711       *ws = 0;
3712       obj = *expr_p;
3713
3714       if (DECL_RTL_SET_P (obj))
3715         break;
3716
3717       if (DECL_MODE (obj) == BLKmode)
3718         x = produce_memory_decl_rtl (obj, regno);
3719       else
3720         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3721
3722       break;
3723
3724     default:
3725       break;
3726     }
3727
3728   if (x)
3729     {
3730       decl_rtl_to_reset.safe_push (obj);
3731       SET_DECL_RTL (obj, x);
3732     }
3733
3734   return NULL_TREE;
3735 }
3736
3737 /* Predict whether the given loop will be transformed in the RTL
3738    doloop_optimize pass.  Attempt to duplicate some doloop_optimize checks.
3739    This is only for target independent checks, see targetm.predict_doloop_p
3740    for the target dependent ones.
3741
3742    Note that according to some initial investigation, some checks like costly
3743    niter check and invalid stmt scanning don't have much gains among general
3744    cases, so keep this as simple as possible first.
3745
3746    Some RTL specific checks seems unable to be checked in gimple, if any new
3747    checks or easy checks _are_ missing here, please add them.  */
3748
3749 static bool ATTRIBUTE_UNUSED
3750 generic_predict_doloop_p (struct ivopts_data *data)
3751 {
3752   struct loop *loop = data->current_loop;
3753
3754   /* Call target hook for target dependent checks.  */
3755   if (!targetm.predict_doloop_p (loop))
3756     {
3757       if (dump_file && (dump_flags & TDF_DETAILS))
3758         fprintf (dump_file, "Predict doloop failure due to"
3759                             " target specific checks.\n");
3760       return false;
3761     }
3762
3763   /* Similar to doloop_optimize, check iteration description to know it's
3764      suitable or not.  Keep it as simple as possible, feel free to extend it
3765      if you find any multiple exits cases matter.  */
3766   edge exit = single_dom_exit (loop);
3767   struct tree_niter_desc *niter_desc;
3768   if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3769     {
3770       if (dump_file && (dump_flags & TDF_DETAILS))
3771         fprintf (dump_file, "Predict doloop failure due to"
3772                             " unexpected niters.\n");
3773       return false;
3774     }
3775
3776   /* Similar to doloop_optimize, check whether iteration count too small
3777      and not profitable.  */
3778   HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3779   if (est_niter == -1)
3780     est_niter = get_likely_max_loop_iterations_int (loop);
3781   if (est_niter >= 0 && est_niter < 3)
3782     {
3783       if (dump_file && (dump_flags & TDF_DETAILS))
3784         fprintf (dump_file,
3785                  "Predict doloop failure due to"
3786                  " too few iterations (%u).\n",
3787                  (unsigned int) est_niter);
3788       return false;
3789     }
3790
3791   return true;
3792 }
3793
3794 /* Determines cost of the computation of EXPR.  */
3795
3796 static unsigned
3797 computation_cost (tree expr, bool speed)
3798 {
3799   rtx_insn *seq;
3800   rtx rslt;
3801   tree type = TREE_TYPE (expr);
3802   unsigned cost;
3803   /* Avoid using hard regs in ways which may be unsupported.  */
3804   int regno = LAST_VIRTUAL_REGISTER + 1;
3805   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3806   enum node_frequency real_frequency = node->frequency;
3807
3808   node->frequency = NODE_FREQUENCY_NORMAL;
3809   crtl->maybe_hot_insn_p = speed;
3810   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3811   start_sequence ();
3812   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3813   seq = get_insns ();
3814   end_sequence ();
3815   default_rtl_profile ();
3816   node->frequency = real_frequency;
3817
3818   cost = seq_cost (seq, speed);
3819   if (MEM_P (rslt))
3820     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3821                           TYPE_ADDR_SPACE (type), speed);
3822   else if (!REG_P (rslt))
3823     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3824
3825   return cost;
3826 }
3827
3828 /* Returns variable containing the value of candidate CAND at statement AT.  */
3829
3830 static tree
3831 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3832 {
3833   if (stmt_after_increment (loop, cand, stmt))
3834     return cand->var_after;
3835   else
3836     return cand->var_before;
3837 }
3838
3839 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3840    same precision that is at least as wide as the precision of TYPE, stores
3841    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3842    type of A and B.  */
3843
3844 static tree
3845 determine_common_wider_type (tree *a, tree *b)
3846 {
3847   tree wider_type = NULL;
3848   tree suba, subb;
3849   tree atype = TREE_TYPE (*a);
3850
3851   if (CONVERT_EXPR_P (*a))
3852     {
3853       suba = TREE_OPERAND (*a, 0);
3854       wider_type = TREE_TYPE (suba);
3855       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3856         return atype;
3857     }
3858   else
3859     return atype;
3860
3861   if (CONVERT_EXPR_P (*b))
3862     {
3863       subb = TREE_OPERAND (*b, 0);
3864       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3865         return atype;
3866     }
3867   else
3868     return atype;
3869
3870   *a = suba;
3871   *b = subb;
3872   return wider_type;
3873 }
3874
3875 /* Determines the expression by that USE is expressed from induction variable
3876    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3877    decomposed form.  The invariant part is stored in AFF_INV; while variant
3878    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3879    non-null.  Returns false if USE cannot be expressed using CAND.  */
3880
3881 static bool
3882 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3883                        struct iv_cand *cand, struct aff_tree *aff_inv,
3884                        struct aff_tree *aff_var, widest_int *prat = NULL)
3885 {
3886   tree ubase = use->iv->base, ustep = use->iv->step;
3887   tree cbase = cand->iv->base, cstep = cand->iv->step;
3888   tree common_type, uutype, var, cstep_common;
3889   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3890   aff_tree aff_cbase;
3891   widest_int rat;
3892
3893   /* We must have a precision to express the values of use.  */
3894   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3895     return false;
3896
3897   var = var_at_stmt (loop, cand, at);
3898   uutype = unsigned_type_for (utype);
3899
3900   /* If the conversion is not noop, perform it.  */
3901   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3902     {
3903       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3904           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3905         {
3906           tree inner_base, inner_step, inner_type;
3907           inner_base = TREE_OPERAND (cbase, 0);
3908           if (CONVERT_EXPR_P (cstep))
3909             inner_step = TREE_OPERAND (cstep, 0);
3910           else
3911             inner_step = cstep;
3912
3913           inner_type = TREE_TYPE (inner_base);
3914           /* If candidate is added from a biv whose type is smaller than
3915              ctype, we know both candidate and the biv won't overflow.
3916              In this case, it's safe to skip the convertion in candidate.
3917              As an example, (unsigned short)((unsigned long)A) equals to
3918              (unsigned short)A, if A has a type no larger than short.  */
3919           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3920             {
3921               cbase = inner_base;
3922               cstep = inner_step;
3923             }
3924         }
3925       cbase = fold_convert (uutype, cbase);
3926       cstep = fold_convert (uutype, cstep);
3927       var = fold_convert (uutype, var);
3928     }
3929
3930   /* Ratio is 1 when computing the value of biv cand by itself.
3931      We can't rely on constant_multiple_of in this case because the
3932      use is created after the original biv is selected.  The call
3933      could fail because of inconsistent fold behavior.  See PR68021
3934      for more information.  */
3935   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3936     {
3937       gcc_assert (is_gimple_assign (use->stmt));
3938       gcc_assert (use->iv->ssa_name == cand->var_after);
3939       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3940       rat = 1;
3941     }
3942   else if (!constant_multiple_of (ustep, cstep, &rat))
3943     return false;
3944
3945   if (prat)
3946     *prat = rat;
3947
3948   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3949      type, we achieve better folding by computing their difference in this
3950      wider type, and cast the result to UUTYPE.  We do not need to worry about
3951      overflows, as all the arithmetics will in the end be performed in UUTYPE
3952      anyway.  */
3953   common_type = determine_common_wider_type (&ubase, &cbase);
3954
3955   /* use = ubase - ratio * cbase + ratio * var.  */
3956   tree_to_aff_combination (ubase, common_type, aff_inv);
3957   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3958   tree_to_aff_combination (var, uutype, aff_var);
3959
3960   /* We need to shift the value if we are after the increment.  */
3961   if (stmt_after_increment (loop, cand, at))
3962     {
3963       aff_tree cstep_aff;
3964
3965       if (common_type != uutype)
3966         cstep_common = fold_convert (common_type, cstep);
3967       else
3968         cstep_common = cstep;
3969
3970       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3971       aff_combination_add (&aff_cbase, &cstep_aff);
3972     }
3973
3974   aff_combination_scale (&aff_cbase, -rat);
3975   aff_combination_add (aff_inv, &aff_cbase);
3976   if (common_type != uutype)
3977     aff_combination_convert (aff_inv, uutype);
3978
3979   aff_combination_scale (aff_var, rat);
3980   return true;
3981 }
3982
3983 /* Determines the expression by that USE is expressed from induction variable
3984    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3985    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3986
3987 static bool
3988 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3989                      struct iv_cand *cand, struct aff_tree *aff)
3990 {
3991   aff_tree aff_var;
3992
3993   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3994     return false;
3995
3996   aff_combination_add (aff, &aff_var);
3997   return true;
3998 }
3999
4000 /* Return the type of USE.  */
4001
4002 static tree
4003 get_use_type (struct iv_use *use)
4004 {
4005   tree base_type = TREE_TYPE (use->iv->base);
4006   tree type;
4007
4008   if (use->type == USE_REF_ADDRESS)
4009     {
4010       /* The base_type may be a void pointer.  Create a pointer type based on
4011          the mem_ref instead.  */
4012       type = build_pointer_type (TREE_TYPE (*use->op_p));
4013       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4014                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4015     }
4016   else
4017     type = base_type;
4018
4019   return type;
4020 }
4021
4022 /* Determines the expression by that USE is expressed from induction variable
4023    CAND at statement AT in LOOP.  The computation is unshared.  */
4024
4025 static tree
4026 get_computation_at (struct loop *loop, gimple *at,
4027                     struct iv_use *use, struct iv_cand *cand)
4028 {
4029   aff_tree aff;
4030   tree type = get_use_type (use);
4031
4032   if (!get_computation_aff (loop, at, use, cand, &aff))
4033     return NULL_TREE;
4034   unshare_aff_combination (&aff);
4035   return fold_convert (type, aff_combination_to_tree (&aff));
4036 }
4037
4038 /* Adjust the cost COST for being in loop setup rather than loop body.
4039    If we're optimizing for space, the loop setup overhead is constant;
4040    if we're optimizing for speed, amortize it over the per-iteration cost.
4041    If ROUND_UP_P is true, the result is round up rather than to zero when
4042    optimizing for speed.  */
4043 static int64_t
4044 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4045                    bool round_up_p = false)
4046 {
4047   if (cost == INFTY)
4048     return cost;
4049   else if (optimize_loop_for_speed_p (data->current_loop))
4050     {
4051       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4052       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4053     }
4054   else
4055     return cost;
4056 }
4057
4058 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4059    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4060    calculating the operands of EXPR.  Returns true if successful, and returns
4061    the cost in COST.  */
4062
4063 static bool
4064 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4065                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4066 {
4067   comp_cost res;
4068   tree op1 = TREE_OPERAND (expr, 1);
4069   tree cst = TREE_OPERAND (mult, 1);
4070   tree multop = TREE_OPERAND (mult, 0);
4071   int m = exact_log2 (int_cst_value (cst));
4072   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4073   int as_cost, sa_cost;
4074   bool mult_in_op1;
4075
4076   if (!(m >= 0 && m < maxm))
4077     return false;
4078
4079   STRIP_NOPS (op1);
4080   mult_in_op1 = operand_equal_p (op1, mult, 0);
4081
4082   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4083
4084   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4085      use that in preference to a shift insn followed by an add insn.  */
4086   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4087              ? shiftadd_cost (speed, mode, m)
4088              : (mult_in_op1
4089                 ? shiftsub1_cost (speed, mode, m)
4090                 : shiftsub0_cost (speed, mode, m)));
4091
4092   res = comp_cost (MIN (as_cost, sa_cost), 0);
4093   res += (mult_in_op1 ? cost0 : cost1);
4094
4095   STRIP_NOPS (multop);
4096   if (!is_gimple_val (multop))
4097     res += force_expr_to_var_cost (multop, speed);
4098
4099   *cost = res;
4100   return true;
4101 }
4102
4103 /* Estimates cost of forcing expression EXPR into a variable.  */
4104
4105 static comp_cost
4106 force_expr_to_var_cost (tree expr, bool speed)
4107 {
4108   static bool costs_initialized = false;
4109   static unsigned integer_cost [2];
4110   static unsigned symbol_cost [2];
4111   static unsigned address_cost [2];
4112   tree op0, op1;
4113   comp_cost cost0, cost1, cost;
4114   machine_mode mode;
4115   scalar_int_mode int_mode;
4116
4117   if (!costs_initialized)
4118     {
4119       tree type = build_pointer_type (integer_type_node);
4120       tree var, addr;
4121       rtx x;
4122       int i;
4123
4124       var = create_tmp_var_raw (integer_type_node, "test_var");
4125       TREE_STATIC (var) = 1;
4126       x = produce_memory_decl_rtl (var, NULL);
4127       SET_DECL_RTL (var, x);
4128
4129       addr = build1 (ADDR_EXPR, type, var);
4130
4131
4132       for (i = 0; i < 2; i++)
4133         {
4134           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4135                                                              2000), i);
4136
4137           symbol_cost[i] = computation_cost (addr, i) + 1;
4138
4139           address_cost[i]
4140             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4141           if (dump_file && (dump_flags & TDF_DETAILS))
4142             {
4143               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4144               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4145               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4146               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4147               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4148               fprintf (dump_file, "\n");
4149             }
4150         }
4151
4152       costs_initialized = true;
4153     }
4154
4155   STRIP_NOPS (expr);
4156
4157   if (SSA_VAR_P (expr))
4158     return no_cost;
4159
4160   if (is_gimple_min_invariant (expr))
4161     {
4162       if (poly_int_tree_p (expr))
4163         return comp_cost (integer_cost [speed], 0);
4164
4165       if (TREE_CODE (expr) == ADDR_EXPR)
4166         {
4167           tree obj = TREE_OPERAND (expr, 0);
4168
4169           if (VAR_P (obj)
4170               || TREE_CODE (obj) == PARM_DECL
4171               || TREE_CODE (obj) == RESULT_DECL)
4172             return comp_cost (symbol_cost [speed], 0);
4173         }
4174
4175       return comp_cost (address_cost [speed], 0);
4176     }
4177
4178   switch (TREE_CODE (expr))
4179     {
4180     case POINTER_PLUS_EXPR:
4181     case PLUS_EXPR:
4182     case MINUS_EXPR:
4183     case MULT_EXPR:
4184     case TRUNC_DIV_EXPR:
4185     case BIT_AND_EXPR:
4186     case BIT_IOR_EXPR:
4187     case LSHIFT_EXPR:
4188     case RSHIFT_EXPR:
4189       op0 = TREE_OPERAND (expr, 0);
4190       op1 = TREE_OPERAND (expr, 1);
4191       STRIP_NOPS (op0);
4192       STRIP_NOPS (op1);
4193       break;
4194
4195     CASE_CONVERT:
4196     case NEGATE_EXPR:
4197     case BIT_NOT_EXPR:
4198       op0 = TREE_OPERAND (expr, 0);
4199       STRIP_NOPS (op0);
4200       op1 = NULL_TREE;
4201       break;
4202
4203     default:
4204       /* Just an arbitrary value, FIXME.  */
4205       return comp_cost (target_spill_cost[speed], 0);
4206     }
4207
4208   if (op0 == NULL_TREE
4209       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4210     cost0 = no_cost;
4211   else
4212     cost0 = force_expr_to_var_cost (op0, speed);
4213
4214   if (op1 == NULL_TREE
4215       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4216     cost1 = no_cost;
4217   else
4218     cost1 = force_expr_to_var_cost (op1, speed);
4219
4220   mode = TYPE_MODE (TREE_TYPE (expr));
4221   switch (TREE_CODE (expr))
4222     {
4223     case POINTER_PLUS_EXPR:
4224     case PLUS_EXPR:
4225     case MINUS_EXPR:
4226     case NEGATE_EXPR:
4227       cost = comp_cost (add_cost (speed, mode), 0);
4228       if (TREE_CODE (expr) != NEGATE_EXPR)
4229         {
4230           tree mult = NULL_TREE;
4231           comp_cost sa_cost;
4232           if (TREE_CODE (op1) == MULT_EXPR)
4233             mult = op1;
4234           else if (TREE_CODE (op0) == MULT_EXPR)
4235             mult = op0;
4236
4237           if (mult != NULL_TREE
4238               && is_a <scalar_int_mode> (mode, &int_mode)
4239               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4240               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4241                                     speed, &sa_cost))
4242             return sa_cost;
4243         }
4244       break;
4245
4246     CASE_CONVERT:
4247       {
4248         tree inner_mode, outer_mode;
4249         outer_mode = TREE_TYPE (expr);
4250         inner_mode = TREE_TYPE (op0);
4251         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4252                                        TYPE_MODE (inner_mode), speed), 0);
4253       }
4254       break;
4255
4256     case MULT_EXPR:
4257       if (cst_and_fits_in_hwi (op0))
4258         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4259                                              mode, speed), 0);
4260       else if (cst_and_fits_in_hwi (op1))
4261         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4262                                              mode, speed), 0);
4263       else
4264         return comp_cost (target_spill_cost [speed], 0);
4265       break;
4266
4267     case TRUNC_DIV_EXPR:
4268       /* Division by power of two is usually cheap, so we allow it.  Forbid
4269          anything else.  */
4270       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4271         cost = comp_cost (add_cost (speed, mode), 0);
4272       else
4273         cost = comp_cost (target_spill_cost[speed], 0);
4274       break;
4275
4276     case BIT_AND_EXPR:
4277     case BIT_IOR_EXPR:
4278     case BIT_NOT_EXPR:
4279     case LSHIFT_EXPR:
4280     case RSHIFT_EXPR:
4281       cost = comp_cost (add_cost (speed, mode), 0);
4282       break;
4283
4284     default:
4285       gcc_unreachable ();
4286     }
4287
4288   cost += cost0;
4289   cost += cost1;
4290   return cost;
4291 }
4292
4293 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4294    invariants the computation depends on.  */
4295
4296 static comp_cost
4297 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4298 {
4299   if (!expr)
4300     return no_cost;
4301
4302   find_inv_vars (data, &expr, inv_vars);
4303   return force_expr_to_var_cost (expr, data->speed);
4304 }
4305
4306 /* Returns cost of auto-modifying address expression in shape base + offset.
4307    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4308    address expression.  The address expression has ADDR_MODE in addr space
4309    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4310    speed or size.  */
4311
4312 enum ainc_type
4313 {
4314   AINC_PRE_INC,         /* Pre increment.  */
4315   AINC_PRE_DEC,         /* Pre decrement.  */
4316   AINC_POST_INC,        /* Post increment.  */
4317   AINC_POST_DEC,        /* Post decrement.  */
4318   AINC_NONE             /* Also the number of auto increment types.  */
4319 };
4320
4321 struct ainc_cost_data
4322 {
4323   int64_t costs[AINC_NONE];
4324 };
4325
4326 static comp_cost
4327 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4328                        machine_mode addr_mode, machine_mode mem_mode,
4329                        addr_space_t as, bool speed)
4330 {
4331   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4332       && !USE_STORE_PRE_DECREMENT (mem_mode)
4333       && !USE_LOAD_POST_DECREMENT (mem_mode)
4334       && !USE_STORE_POST_DECREMENT (mem_mode)
4335       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4336       && !USE_STORE_PRE_INCREMENT (mem_mode)
4337       && !USE_LOAD_POST_INCREMENT (mem_mode)
4338       && !USE_STORE_POST_INCREMENT (mem_mode))
4339     return infinite_cost;
4340
4341   static vec<ainc_cost_data *> ainc_cost_data_list;
4342   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4343   if (idx >= ainc_cost_data_list.length ())
4344     {
4345       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4346
4347       gcc_assert (nsize > idx);
4348       ainc_cost_data_list.safe_grow_cleared (nsize);
4349     }
4350
4351   ainc_cost_data *data = ainc_cost_data_list[idx];
4352   if (data == NULL)
4353     {
4354       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4355
4356       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4357       data->costs[AINC_PRE_DEC] = INFTY;
4358       data->costs[AINC_POST_DEC] = INFTY;
4359       data->costs[AINC_PRE_INC] = INFTY;
4360       data->costs[AINC_POST_INC] = INFTY;
4361       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4362           || USE_STORE_PRE_DECREMENT (mem_mode))
4363         {
4364           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4365
4366           if (memory_address_addr_space_p (mem_mode, addr, as))
4367             data->costs[AINC_PRE_DEC]
4368               = address_cost (addr, mem_mode, as, speed);
4369         }
4370       if (USE_LOAD_POST_DECREMENT (mem_mode)
4371           || USE_STORE_POST_DECREMENT (mem_mode))
4372         {
4373           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4374
4375           if (memory_address_addr_space_p (mem_mode, addr, as))
4376             data->costs[AINC_POST_DEC]
4377               = address_cost (addr, mem_mode, as, speed);
4378         }
4379       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4380           || USE_STORE_PRE_INCREMENT (mem_mode))
4381         {
4382           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4383
4384           if (memory_address_addr_space_p (mem_mode, addr, as))
4385             data->costs[AINC_PRE_INC]
4386               = address_cost (addr, mem_mode, as, speed);
4387         }
4388       if (USE_LOAD_POST_INCREMENT (mem_mode)
4389           || USE_STORE_POST_INCREMENT (mem_mode))
4390         {
4391           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4392
4393           if (memory_address_addr_space_p (mem_mode, addr, as))
4394             data->costs[AINC_POST_INC]
4395               = address_cost (addr, mem_mode, as, speed);
4396         }
4397       ainc_cost_data_list[idx] = data;
4398     }
4399
4400   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4401   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4402     return comp_cost (data->costs[AINC_POST_INC], 0);
4403   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4404     return comp_cost (data->costs[AINC_POST_DEC], 0);
4405   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4406     return comp_cost (data->costs[AINC_PRE_INC], 0);
4407   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4408     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4409
4410   return infinite_cost;
4411 }
4412
4413 /* Return cost of computing USE's address expression by using CAND.
4414    AFF_INV and AFF_VAR represent invariant and variant parts of the
4415    address expression, respectively.  If AFF_INV is simple, store
4416    the loop invariant variables which are depended by it in INV_VARS;
4417    if AFF_INV is complicated, handle it as a new invariant expression
4418    and record it in INV_EXPR.  RATIO indicates multiple times between
4419    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4420    value to it indicating if this is an auto-increment address.  */
4421
4422 static comp_cost
4423 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4424                   struct iv_cand *cand, aff_tree *aff_inv,
4425                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4426                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4427                   bool *can_autoinc, bool speed)
4428 {
4429   rtx addr;
4430   bool simple_inv = true;
4431   tree comp_inv = NULL_TREE, type = aff_var->type;
4432   comp_cost var_cost = no_cost, cost = no_cost;
4433   struct mem_address parts = {NULL_TREE, integer_one_node,
4434                               NULL_TREE, NULL_TREE, NULL_TREE};
4435   machine_mode addr_mode = TYPE_MODE (type);
4436   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4437   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4438   /* Only true if ratio != 1.  */
4439   bool ok_with_ratio_p = false;
4440   bool ok_without_ratio_p = false;
4441
4442   if (!aff_combination_const_p (aff_inv))
4443     {
4444       parts.index = integer_one_node;
4445       /* Addressing mode "base + index".  */
4446       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4447       if (ratio != 1)
4448         {
4449           parts.step = wide_int_to_tree (type, ratio);
4450           /* Addressing mode "base + index << scale".  */
4451           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4452           if (!ok_with_ratio_p)
4453             parts.step = NULL_TREE;
4454         }
4455       if (ok_with_ratio_p || ok_without_ratio_p)
4456         {
4457           if (maybe_ne (aff_inv->offset, 0))
4458             {
4459               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4460               /* Addressing mode "base + index [<< scale] + offset".  */
4461               if (!valid_mem_ref_p (mem_mode, as, &parts))
4462                 parts.offset = NULL_TREE;
4463               else
4464                 aff_inv->offset = 0;
4465             }
4466
4467           move_fixed_address_to_symbol (&parts, aff_inv);
4468           /* Base is fixed address and is moved to symbol part.  */
4469           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4470             parts.base = NULL_TREE;
4471
4472           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4473           if (parts.symbol != NULL_TREE
4474               && !valid_mem_ref_p (mem_mode, as, &parts))
4475             {
4476               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4477               parts.symbol = NULL_TREE;
4478               /* Reset SIMPLE_INV since symbol address needs to be computed
4479                  outside of address expression in this case.  */
4480               simple_inv = false;
4481               /* Symbol part is moved back to base part, it can't be NULL.  */
4482               parts.base = integer_one_node;
4483             }
4484         }
4485       else
4486         parts.index = NULL_TREE;
4487     }
4488   else
4489     {
4490       poly_int64 ainc_step;
4491       if (can_autoinc
4492           && ratio == 1
4493           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4494         {
4495           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4496
4497           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4498             ainc_offset += ainc_step;
4499           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4500                                         addr_mode, mem_mode, as, speed);
4501           if (!cost.infinite_cost_p ())
4502             {
4503               *can_autoinc = true;
4504               return cost;
4505             }
4506           cost = no_cost;
4507         }
4508       if (!aff_combination_zero_p (aff_inv))
4509         {
4510           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4511           /* Addressing mode "base + offset".  */
4512           if (!valid_mem_ref_p (mem_mode, as, &parts))
4513             parts.offset = NULL_TREE;
4514           else
4515             aff_inv->offset = 0;
4516         }
4517     }
4518
4519   if (simple_inv)
4520     simple_inv = (aff_inv == NULL
4521                   || aff_combination_const_p (aff_inv)
4522                   || aff_combination_singleton_var_p (aff_inv));
4523   if (!aff_combination_zero_p (aff_inv))
4524     comp_inv = aff_combination_to_tree (aff_inv);
4525   if (comp_inv != NULL_TREE)
4526     cost = force_var_cost (data, comp_inv, inv_vars);
4527   if (ratio != 1 && parts.step == NULL_TREE)
4528     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4529   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4530     var_cost += add_cost (speed, addr_mode);
4531
4532   if (comp_inv && inv_expr && !simple_inv)
4533     {
4534       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4535       /* Clear depends on.  */
4536       if (*inv_expr != NULL && inv_vars && *inv_vars)
4537         bitmap_clear (*inv_vars);
4538
4539       /* Cost of small invariant expression adjusted against loop niters
4540          is usually zero, which makes it difficult to be differentiated
4541          from candidate based on loop invariant variables.  Secondly, the
4542          generated invariant expression may not be hoisted out of loop by
4543          following pass.  We penalize the cost by rounding up in order to
4544          neutralize such effects.  */
4545       cost.cost = adjust_setup_cost (data, cost.cost, true);
4546       cost.scratch = cost.cost;
4547     }
4548
4549   cost += var_cost;
4550   addr = addr_for_mem_ref (&parts, as, false);
4551   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4552   cost += address_cost (addr, mem_mode, as, speed);
4553
4554   if (parts.symbol != NULL_TREE)
4555     cost.complexity += 1;
4556   /* Don't increase the complexity of adding a scaled index if it's
4557      the only kind of index that the target allows.  */
4558   if (parts.step != NULL_TREE && ok_without_ratio_p)
4559     cost.complexity += 1;
4560   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4561     cost.complexity += 1;
4562   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4563     cost.complexity += 1;
4564
4565   return cost;
4566 }
4567
4568 /* Scale (multiply) the computed COST (except scratch part that should be
4569    hoisted out a loop) by header->frequency / AT->frequency, which makes
4570    expected cost more accurate.  */
4571
4572 static comp_cost
4573 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4574 {
4575   if (data->speed
4576       && data->current_loop->header->count.to_frequency (cfun) > 0)
4577     {
4578       basic_block bb = gimple_bb (at);
4579       gcc_assert (cost.scratch <= cost.cost);
4580       int scale_factor = (int)(intptr_t) bb->aux;
4581       if (scale_factor == 1)
4582         return cost;
4583
4584       int64_t scaled_cost
4585         = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4586
4587       if (dump_file && (dump_flags & TDF_DETAILS))
4588         fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4589                  "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4590                  1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4591
4592       cost.cost = scaled_cost;
4593     }
4594
4595   return cost;
4596 }
4597
4598 /* Determines the cost of the computation by that USE is expressed
4599    from induction variable CAND.  If ADDRESS_P is true, we just need
4600    to create an address from it, otherwise we want to get it into
4601    register.  A set of invariants we depend on is stored in INV_VARS.
4602    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4603    addressing is likely.  If INV_EXPR is nonnull, record invariant
4604    expr entry in it.  */
4605
4606 static comp_cost
4607 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4608                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4609                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4610 {
4611   gimple *at = use->stmt;
4612   tree ubase = use->iv->base, cbase = cand->iv->base;
4613   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4614   tree comp_inv = NULL_TREE;
4615   HOST_WIDE_INT ratio, aratio;
4616   comp_cost cost;
4617   widest_int rat;
4618   aff_tree aff_inv, aff_var;
4619   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4620
4621   if (inv_vars)
4622     *inv_vars = NULL;
4623   if (can_autoinc)
4624     *can_autoinc = false;
4625   if (inv_expr)
4626     *inv_expr = NULL;
4627
4628   /* Check if we have enough precision to express the values of use.  */
4629   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4630     return infinite_cost;
4631
4632   if (address_p
4633       || (use->iv->base_object
4634           && cand->iv->base_object
4635           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4636           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4637     {
4638       /* Do not try to express address of an object with computation based
4639          on address of a different object.  This may cause problems in rtl
4640          level alias analysis (that does not expect this to be happening,
4641          as this is illegal in C), and would be unlikely to be useful
4642          anyway.  */
4643       if (use->iv->base_object
4644           && cand->iv->base_object
4645           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4646         return infinite_cost;
4647     }
4648
4649   if (!get_computation_aff_1 (data->current_loop, at, use,
4650                               cand, &aff_inv, &aff_var, &rat)
4651       || !wi::fits_shwi_p (rat))
4652     return infinite_cost;
4653
4654   ratio = rat.to_shwi ();
4655   if (address_p)
4656     {
4657       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4658                                inv_vars, inv_expr, can_autoinc, speed);
4659       return get_scaled_computation_cost_at (data, at, cost);
4660     }
4661
4662   bool simple_inv = (aff_combination_const_p (&aff_inv)
4663                      || aff_combination_singleton_var_p (&aff_inv));
4664   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4665   aff_combination_convert (&aff_inv, signed_type);
4666   if (!aff_combination_zero_p (&aff_inv))
4667     comp_inv = aff_combination_to_tree (&aff_inv);
4668
4669   cost = force_var_cost (data, comp_inv, inv_vars);
4670   if (comp_inv && inv_expr && !simple_inv)
4671     {
4672       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4673       /* Clear depends on.  */
4674       if (*inv_expr != NULL && inv_vars && *inv_vars)
4675         bitmap_clear (*inv_vars);
4676
4677       cost.cost = adjust_setup_cost (data, cost.cost);
4678       /* Record setup cost in scratch field.  */
4679       cost.scratch = cost.cost;
4680     }
4681   /* Cost of constant integer can be covered when adding invariant part to
4682      variant part.  */
4683   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4684     cost = no_cost;
4685
4686   /* Need type narrowing to represent use with cand.  */
4687   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4688     {
4689       machine_mode outer_mode = TYPE_MODE (utype);
4690       machine_mode inner_mode = TYPE_MODE (ctype);
4691       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4692     }
4693
4694   /* Turn a + i * (-c) into a - i * c.  */
4695   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4696     aratio = -ratio;
4697   else
4698     aratio = ratio;
4699
4700   if (ratio != 1)
4701     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4702
4703   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4704      instruction.  */
4705   /* Need to add up the invariant and variant parts.  */
4706   if (comp_inv && !integer_zerop (comp_inv))
4707     cost += add_cost (speed, TYPE_MODE (utype));
4708
4709   return get_scaled_computation_cost_at (data, at, cost);
4710 }
4711
4712 /* Determines cost of computing the use in GROUP with CAND in a generic
4713    expression.  */
4714
4715 static bool
4716 determine_group_iv_cost_generic (struct ivopts_data *data,
4717                                  struct iv_group *group, struct iv_cand *cand)
4718 {
4719   comp_cost cost;
4720   iv_inv_expr_ent *inv_expr = NULL;
4721   bitmap inv_vars = NULL, inv_exprs = NULL;
4722   struct iv_use *use = group->vuses[0];
4723
4724   /* The simple case first -- if we need to express value of the preserved
4725      original biv, the cost is 0.  This also prevents us from counting the
4726      cost of increment twice -- once at this use and once in the cost of
4727      the candidate.  */
4728   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4729     cost = no_cost;
4730   else
4731     cost = get_computation_cost (data, use, cand, false,
4732                                  &inv_vars, NULL, &inv_expr);
4733
4734   if (inv_expr)
4735     {
4736       inv_exprs = BITMAP_ALLOC (NULL);
4737       bitmap_set_bit (inv_exprs, inv_expr->id);
4738     }
4739   set_group_iv_cost (data, group, cand, cost, inv_vars,
4740                      NULL_TREE, ERROR_MARK, inv_exprs);
4741   return !cost.infinite_cost_p ();
4742 }
4743
4744 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4745
4746 static bool
4747 determine_group_iv_cost_address (struct ivopts_data *data,
4748                                  struct iv_group *group, struct iv_cand *cand)
4749 {
4750   unsigned i;
4751   bitmap inv_vars = NULL, inv_exprs = NULL;
4752   bool can_autoinc;
4753   iv_inv_expr_ent *inv_expr = NULL;
4754   struct iv_use *use = group->vuses[0];
4755   comp_cost sum_cost = no_cost, cost;
4756
4757   cost = get_computation_cost (data, use, cand, true,
4758                                &inv_vars, &can_autoinc, &inv_expr);
4759
4760   if (inv_expr)
4761     {
4762       inv_exprs = BITMAP_ALLOC (NULL);
4763       bitmap_set_bit (inv_exprs, inv_expr->id);
4764     }
4765   sum_cost = cost;
4766   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4767     {
4768       if (can_autoinc)
4769         sum_cost -= cand->cost_step;
4770       /* If we generated the candidate solely for exploiting autoincrement
4771          opportunities, and it turns out it can't be used, set the cost to
4772          infinity to make sure we ignore it.  */
4773       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4774         sum_cost = infinite_cost;
4775     }
4776
4777   /* Uses in a group can share setup code, so only add setup cost once.  */
4778   cost -= cost.scratch;
4779   /* Compute and add costs for rest uses of this group.  */
4780   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4781     {
4782       struct iv_use *next = group->vuses[i];
4783
4784       /* TODO: We could skip computing cost for sub iv_use when it has the
4785          same cost as the first iv_use, but the cost really depends on the
4786          offset and where the iv_use is.  */
4787         cost = get_computation_cost (data, next, cand, true,
4788                                      NULL, &can_autoinc, &inv_expr);
4789         if (inv_expr)
4790           {
4791             if (!inv_exprs)
4792               inv_exprs = BITMAP_ALLOC (NULL);
4793
4794             bitmap_set_bit (inv_exprs, inv_expr->id);
4795           }
4796       sum_cost += cost;
4797     }
4798   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4799                      NULL_TREE, ERROR_MARK, inv_exprs);
4800
4801   return !sum_cost.infinite_cost_p ();
4802 }
4803
4804 /* Computes value of candidate CAND at position AT in iteration NITER, and
4805    stores it to VAL.  */
4806
4807 static void
4808 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4809                aff_tree *val)
4810 {
4811   aff_tree step, delta, nit;
4812   struct iv *iv = cand->iv;
4813   tree type = TREE_TYPE (iv->base);
4814   tree steptype;
4815   if (POINTER_TYPE_P (type))
4816     steptype = sizetype;
4817   else
4818     steptype = unsigned_type_for (type);
4819
4820   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4821   aff_combination_convert (&step, steptype);
4822   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4823   aff_combination_convert (&nit, steptype);
4824   aff_combination_mult (&nit, &step, &delta);
4825   if (stmt_after_increment (loop, cand, at))
4826     aff_combination_add (&delta, &step);
4827
4828   tree_to_aff_combination (iv->base, type, val);
4829   if (!POINTER_TYPE_P (type))
4830     aff_combination_convert (val, steptype);
4831   aff_combination_add (val, &delta);
4832 }
4833
4834 /* Returns period of induction variable iv.  */
4835
4836 static tree
4837 iv_period (struct iv *iv)
4838 {
4839   tree step = iv->step, period, type;
4840   tree pow2div;
4841
4842   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4843
4844   type = unsigned_type_for (TREE_TYPE (step));
4845   /* Period of the iv is lcm (step, type_range)/step -1,
4846      i.e., N*type_range/step - 1. Since type range is power
4847      of two, N == (step >> num_of_ending_zeros_binary (step),
4848      so the final result is
4849
4850        (type_range >> num_of_ending_zeros_binary (step)) - 1
4851
4852   */
4853   pow2div = num_ending_zeros (step);
4854
4855   period = build_low_bits_mask (type,
4856                                 (TYPE_PRECISION (type)
4857                                  - tree_to_uhwi (pow2div)));
4858
4859   return period;
4860 }
4861
4862 /* Returns the comparison operator used when eliminating the iv USE.  */
4863
4864 static enum tree_code
4865 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4866 {
4867   struct loop *loop = data->current_loop;
4868   basic_block ex_bb;
4869   edge exit;
4870
4871   ex_bb = gimple_bb (use->stmt);
4872   exit = EDGE_SUCC (ex_bb, 0);
4873   if (flow_bb_inside_loop_p (loop, exit->dest))
4874     exit = EDGE_SUCC (ex_bb, 1);
4875
4876   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4877 }
4878
4879 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4880    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4881    calculation is performed in non-wrapping type.
4882
4883    TODO: More generally, we could test for the situation that
4884          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4885          This would require knowing the sign of OFFSET.  */
4886
4887 static bool
4888 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4889 {
4890   enum tree_code code;
4891   tree e1, e2;
4892   aff_tree aff_e1, aff_e2, aff_offset;
4893
4894   if (!nowrap_type_p (TREE_TYPE (base)))
4895     return false;
4896
4897   base = expand_simple_operations (base);
4898
4899   if (TREE_CODE (base) == SSA_NAME)
4900     {
4901       gimple *stmt = SSA_NAME_DEF_STMT (base);
4902
4903       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4904         return false;
4905
4906       code = gimple_assign_rhs_code (stmt);
4907       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4908         return false;
4909
4910       e1 = gimple_assign_rhs1 (stmt);
4911       e2 = gimple_assign_rhs2 (stmt);
4912     }
4913   else
4914     {
4915       code = TREE_CODE (base);
4916       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4917         return false;
4918       e1 = TREE_OPERAND (base, 0);
4919       e2 = TREE_OPERAND (base, 1);
4920     }
4921
4922   /* Use affine expansion as deeper inspection to prove the equality.  */
4923   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4924                                   &aff_e2, &data->name_expansion_cache);
4925   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4926                                   &aff_offset, &data->name_expansion_cache);
4927   aff_combination_scale (&aff_offset, -1);
4928   switch (code)
4929     {
4930     case PLUS_EXPR:
4931       aff_combination_add (&aff_e2, &aff_offset);
4932       if (aff_combination_zero_p (&aff_e2))
4933         return true;
4934
4935       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4936                                       &aff_e1, &data->name_expansion_cache);
4937       aff_combination_add (&aff_e1, &aff_offset);
4938       return aff_combination_zero_p (&aff_e1);
4939
4940     case POINTER_PLUS_EXPR:
4941       aff_combination_add (&aff_e2, &aff_offset);
4942       return aff_combination_zero_p (&aff_e2);
4943
4944     default:
4945       return false;
4946     }
4947 }
4948
4949 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4950    comparison with CAND.  NITER describes the number of iterations of
4951    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4952
4953    We aim to handle the following situation:
4954
4955    sometype *base, *p;
4956    int a, b, i;
4957
4958    i = a;
4959    p = p_0 = base + a;
4960
4961    do
4962      {
4963        bla (*p);
4964        p++;
4965        i++;
4966      }
4967    while (i < b);
4968
4969    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4970    We aim to optimize this to
4971
4972    p = p_0 = base + a;
4973    do
4974      {
4975        bla (*p);
4976        p++;
4977      }
4978    while (p < p_0 - a + b);
4979
4980    This preserves the correctness, since the pointer arithmetics does not
4981    overflow.  More precisely:
4982
4983    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4984       overflow in computing it or the values of p.
4985    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4986       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4987
4988 static bool
4989 iv_elimination_compare_lt (struct ivopts_data *data,
4990                            struct iv_cand *cand, enum tree_code *comp_p,
4991                            struct tree_niter_desc *niter)
4992 {
4993   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4994   struct aff_tree nit, tmpa, tmpb;
4995   enum tree_code comp;
4996   HOST_WIDE_INT step;
4997
4998   /* We need to know that the candidate induction variable does not overflow.
4999      While more complex analysis may be used to prove this, for now just
5000      check that the variable appears in the original program and that it
5001      is computed in a type that guarantees no overflows.  */
5002   cand_type = TREE_TYPE (cand->iv->base);
5003   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5004     return false;
5005
5006   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5007      the calculation of the BOUND could overflow, making the comparison
5008      invalid.  */
5009   if (!data->loop_single_exit_p)
5010     return false;
5011
5012   /* We need to be able to decide whether candidate is increasing or decreasing
5013      in order to choose the right comparison operator.  */
5014   if (!cst_and_fits_in_hwi (cand->iv->step))
5015     return false;
5016   step = int_cst_value (cand->iv->step);
5017
5018   /* Check that the number of iterations matches the expected pattern:
5019      a + 1 > b ? 0 : b - a - 1.  */
5020   mbz = niter->may_be_zero;
5021   if (TREE_CODE (mbz) == GT_EXPR)
5022     {
5023       /* Handle a + 1 > b.  */
5024       tree op0 = TREE_OPERAND (mbz, 0);
5025       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5026         {
5027           a = TREE_OPERAND (op0, 0);
5028           b = TREE_OPERAND (mbz, 1);
5029         }
5030       else
5031         return false;
5032     }
5033   else if (TREE_CODE (mbz) == LT_EXPR)
5034     {
5035       tree op1 = TREE_OPERAND (mbz, 1);
5036
5037       /* Handle b < a + 1.  */
5038       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5039         {
5040           a = TREE_OPERAND (op1, 0);
5041           b = TREE_OPERAND (mbz, 0);
5042         }
5043       else
5044         return false;
5045     }
5046   else
5047     return false;
5048
5049   /* Expected number of iterations is B - A - 1.  Check that it matches
5050      the actual number, i.e., that B - A - NITER = 1.  */
5051   tree_to_aff_combination (niter->niter, nit_type, &nit);
5052   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5053   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5054   aff_combination_scale (&nit, -1);
5055   aff_combination_scale (&tmpa, -1);
5056   aff_combination_add (&tmpb, &tmpa);
5057   aff_combination_add (&tmpb, &nit);
5058   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5059     return false;
5060
5061   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5062      overflow.  */
5063   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5064                         cand->iv->step,
5065                         fold_convert (TREE_TYPE (cand->iv->step), a));
5066   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5067     return false;
5068
5069   /* Determine the new comparison operator.  */
5070   comp = step < 0 ? GT_EXPR : LT_EXPR;
5071   if (*comp_p == NE_EXPR)
5072     *comp_p = comp;
5073   else if (*comp_p == EQ_EXPR)
5074     *comp_p = invert_tree_comparison (comp, false);
5075   else
5076     gcc_unreachable ();
5077
5078   return true;
5079 }
5080
5081 /* Check whether it is possible to express the condition in USE by comparison
5082    of candidate CAND.  If so, store the value compared with to BOUND, and the
5083    comparison operator to COMP.  */
5084
5085 static bool
5086 may_eliminate_iv (struct ivopts_data *data,
5087                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5088                   enum tree_code *comp)
5089 {
5090   basic_block ex_bb;
5091   edge exit;
5092   tree period;
5093   struct loop *loop = data->current_loop;
5094   aff_tree bnd;
5095   struct tree_niter_desc *desc = NULL;
5096
5097   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5098     return false;
5099
5100   /* For now works only for exits that dominate the loop latch.
5101      TODO: extend to other conditions inside loop body.  */
5102   ex_bb = gimple_bb (use->stmt);
5103   if (use->stmt != last_stmt (ex_bb)
5104       || gimple_code (use->stmt) != GIMPLE_COND
5105       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5106     return false;
5107
5108   exit = EDGE_SUCC (ex_bb, 0);
5109   if (flow_bb_inside_loop_p (loop, exit->dest))
5110     exit = EDGE_SUCC (ex_bb, 1);
5111   if (flow_bb_inside_loop_p (loop, exit->dest))
5112     return false;
5113
5114   desc = niter_for_exit (data, exit);
5115   if (!desc)
5116     return false;
5117
5118   /* Determine whether we can use the variable to test the exit condition.
5119      This is the case iff the period of the induction variable is greater
5120      than the number of iterations for which the exit condition is true.  */
5121   period = iv_period (cand->iv);
5122
5123   /* If the number of iterations is constant, compare against it directly.  */
5124   if (TREE_CODE (desc->niter) == INTEGER_CST)
5125     {
5126       /* See cand_value_at.  */
5127       if (stmt_after_increment (loop, cand, use->stmt))
5128         {
5129           if (!tree_int_cst_lt (desc->niter, period))
5130             return false;
5131         }
5132       else
5133         {
5134           if (tree_int_cst_lt (period, desc->niter))
5135             return false;
5136         }
5137     }
5138
5139   /* If not, and if this is the only possible exit of the loop, see whether
5140      we can get a conservative estimate on the number of iterations of the
5141      entire loop and compare against that instead.  */
5142   else
5143     {
5144       widest_int period_value, max_niter;
5145
5146       max_niter = desc->max;
5147       if (stmt_after_increment (loop, cand, use->stmt))
5148         max_niter += 1;
5149       period_value = wi::to_widest (period);
5150       if (wi::gtu_p (max_niter, period_value))
5151         {
5152           /* See if we can take advantage of inferred loop bound
5153              information.  */
5154           if (data->loop_single_exit_p)
5155             {
5156               if (!max_loop_iterations (loop, &max_niter))
5157                 return false;
5158               /* The loop bound is already adjusted by adding 1.  */
5159               if (wi::gtu_p (max_niter, period_value))
5160                 return false;
5161             }
5162           else
5163             return false;
5164         }
5165     }
5166
5167   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5168
5169   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5170                          aff_combination_to_tree (&bnd));
5171   *comp = iv_elimination_compare (data, use);
5172
5173   /* It is unlikely that computing the number of iterations using division
5174      would be more profitable than keeping the original induction variable.  */
5175   if (expression_expensive_p (*bound))
5176     return false;
5177
5178   /* Sometimes, it is possible to handle the situation that the number of
5179      iterations may be zero unless additional assumptions by using <
5180      instead of != in the exit condition.
5181
5182      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5183            base the exit condition on it.  However, that is often too
5184            expensive.  */
5185   if (!integer_zerop (desc->may_be_zero))
5186     return iv_elimination_compare_lt (data, cand, comp, desc);
5187
5188   return true;
5189 }
5190
5191  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5192     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5193
5194 static int
5195 parm_decl_cost (struct ivopts_data *data, tree bound)
5196 {
5197   tree sbound = bound;
5198   STRIP_NOPS (sbound);
5199
5200   if (TREE_CODE (sbound) == SSA_NAME
5201       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5202       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5203       && data->body_includes_call)
5204     return COSTS_N_INSNS (1);
5205
5206   return 0;
5207 }
5208
5209 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5210
5211 static bool
5212 determine_group_iv_cost_cond (struct ivopts_data *data,
5213                               struct iv_group *group, struct iv_cand *cand)
5214 {
5215   tree bound = NULL_TREE;
5216   struct iv *cmp_iv;
5217   bitmap inv_exprs = NULL;
5218   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5219   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5220   enum comp_iv_rewrite rewrite_type;
5221   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5222   tree *control_var, *bound_cst;
5223   enum tree_code comp = ERROR_MARK;
5224   struct iv_use *use = group->vuses[0];
5225
5226   /* Extract condition operands.  */
5227   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5228                                         &bound_cst, NULL, &cmp_iv);
5229   gcc_assert (rewrite_type != COMP_IV_NA);
5230
5231   /* Try iv elimination.  */
5232   if (rewrite_type == COMP_IV_ELIM
5233       && may_eliminate_iv (data, use, cand, &bound, &comp))
5234     {
5235       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5236       if (elim_cost.cost == 0)
5237         elim_cost.cost = parm_decl_cost (data, bound);
5238       else if (TREE_CODE (bound) == INTEGER_CST)
5239         elim_cost.cost = 0;
5240       /* If we replace a loop condition 'i < n' with 'p < base + n',
5241          inv_vars_elim will have 'base' and 'n' set, which implies that both
5242          'base' and 'n' will be live during the loop.    More likely,
5243          'base + n' will be loop invariant, resulting in only one live value
5244          during the loop.  So in that case we clear inv_vars_elim and set
5245          inv_expr_elim instead.  */
5246       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5247         {
5248           inv_expr_elim = get_loop_invariant_expr (data, bound);
5249           bitmap_clear (inv_vars_elim);
5250         }
5251       /* The bound is a loop invariant, so it will be only computed
5252          once.  */
5253       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5254     }
5255
5256   /* When the condition is a comparison of the candidate IV against
5257      zero, prefer this IV.
5258
5259      TODO: The constant that we're subtracting from the cost should
5260      be target-dependent.  This information should be added to the
5261      target costs for each backend.  */
5262   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5263       && integer_zerop (*bound_cst)
5264       && (operand_equal_p (*control_var, cand->var_after, 0)
5265           || operand_equal_p (*control_var, cand->var_before, 0)))
5266     elim_cost -= 1;
5267
5268   express_cost = get_computation_cost (data, use, cand, false,
5269                                        &inv_vars_express, NULL,
5270                                        &inv_expr_express);
5271   if (cmp_iv != NULL)
5272     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5273
5274   /* Count the cost of the original bound as well.  */
5275   bound_cost = force_var_cost (data, *bound_cst, NULL);
5276   if (bound_cost.cost == 0)
5277     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5278   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5279     bound_cost.cost = 0;
5280   express_cost += bound_cost;
5281
5282   /* Choose the better approach, preferring the eliminated IV. */
5283   if (elim_cost <= express_cost)
5284     {
5285       cost = elim_cost;
5286       inv_vars = inv_vars_elim;
5287       inv_vars_elim = NULL;
5288       inv_expr = inv_expr_elim;
5289     }
5290   else
5291     {
5292       cost = express_cost;
5293       inv_vars = inv_vars_express;
5294       inv_vars_express = NULL;
5295       bound = NULL_TREE;
5296       comp = ERROR_MARK;
5297       inv_expr = inv_expr_express;
5298     }
5299
5300   if (inv_expr)
5301     {
5302       inv_exprs = BITMAP_ALLOC (NULL);
5303       bitmap_set_bit (inv_exprs, inv_expr->id);
5304     }
5305   set_group_iv_cost (data, group, cand, cost,
5306                      inv_vars, bound, comp, inv_exprs);
5307
5308   if (inv_vars_elim)
5309     BITMAP_FREE (inv_vars_elim);
5310   if (inv_vars_express)
5311     BITMAP_FREE (inv_vars_express);
5312
5313   return !cost.infinite_cost_p ();
5314 }
5315
5316 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5317    if USE cannot be represented with CAND.  */
5318
5319 static bool
5320 determine_group_iv_cost (struct ivopts_data *data,
5321                          struct iv_group *group, struct iv_cand *cand)
5322 {
5323   switch (group->type)
5324     {
5325     case USE_NONLINEAR_EXPR:
5326       return determine_group_iv_cost_generic (data, group, cand);
5327
5328     case USE_REF_ADDRESS:
5329     case USE_PTR_ADDRESS:
5330       return determine_group_iv_cost_address (data, group, cand);
5331
5332     case USE_COMPARE:
5333       return determine_group_iv_cost_cond (data, group, cand);
5334
5335     default:
5336       gcc_unreachable ();
5337     }
5338 }
5339
5340 /* Return true if get_computation_cost indicates that autoincrement is
5341    a possibility for the pair of USE and CAND, false otherwise.  */
5342
5343 static bool
5344 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5345                            struct iv_cand *cand)
5346 {
5347   if (!address_p (use->type))
5348     return false;
5349
5350   bool can_autoinc = false;
5351   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5352   return can_autoinc;
5353 }
5354
5355 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5356    use that allows autoincrement, and set their AINC_USE if possible.  */
5357
5358 static void
5359 set_autoinc_for_original_candidates (struct ivopts_data *data)
5360 {
5361   unsigned i, j;
5362
5363   for (i = 0; i < data->vcands.length (); i++)
5364     {
5365       struct iv_cand *cand = data->vcands[i];
5366       struct iv_use *closest_before = NULL;
5367       struct iv_use *closest_after = NULL;
5368       if (cand->pos != IP_ORIGINAL)
5369         continue;
5370
5371       for (j = 0; j < data->vgroups.length (); j++)
5372         {
5373           struct iv_group *group = data->vgroups[j];
5374           struct iv_use *use = group->vuses[0];
5375           unsigned uid = gimple_uid (use->stmt);
5376
5377           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5378             continue;
5379
5380           if (uid < gimple_uid (cand->incremented_at)
5381               && (closest_before == NULL
5382                   || uid > gimple_uid (closest_before->stmt)))
5383             closest_before = use;
5384
5385           if (uid > gimple_uid (cand->incremented_at)
5386               && (closest_after == NULL
5387                   || uid < gimple_uid (closest_after->stmt)))
5388             closest_after = use;
5389         }
5390
5391       if (closest_before != NULL
5392           && autoinc_possible_for_pair (data, closest_before, cand))
5393         cand->ainc_use = closest_before;
5394       else if (closest_after != NULL
5395                && autoinc_possible_for_pair (data, closest_after, cand))
5396         cand->ainc_use = closest_after;
5397     }
5398 }
5399
5400 /* Relate compare use with all candidates.  */
5401
5402 static void
5403 relate_compare_use_with_all_cands (struct ivopts_data *data)
5404 {
5405   unsigned i, count = data->vcands.length ();
5406   for (i = 0; i < data->vgroups.length (); i++)
5407     {
5408       struct iv_group *group = data->vgroups[i];
5409
5410       if (group->type == USE_COMPARE)
5411         bitmap_set_range (group->related_cands, 0, count);
5412     }
5413 }
5414
5415 /* Finds the candidates for the induction variables.  */
5416
5417 static void
5418 find_iv_candidates (struct ivopts_data *data)
5419 {
5420   /* Add commonly used ivs.  */
5421   add_standard_iv_candidates (data);
5422
5423   /* Add old induction variables.  */
5424   add_iv_candidate_for_bivs (data);
5425
5426   /* Add induction variables derived from uses.  */
5427   add_iv_candidate_for_groups (data);
5428
5429   set_autoinc_for_original_candidates (data);
5430
5431   /* Record the important candidates.  */
5432   record_important_candidates (data);
5433
5434   /* Relate compare iv_use with all candidates.  */
5435   if (!data->consider_all_candidates)
5436     relate_compare_use_with_all_cands (data);
5437
5438   if (dump_file && (dump_flags & TDF_DETAILS))
5439     {
5440       unsigned i;
5441
5442       fprintf (dump_file, "\n<Important Candidates>:\t");
5443       for (i = 0; i < data->vcands.length (); i++)
5444         if (data->vcands[i]->important)
5445           fprintf (dump_file, " %d,", data->vcands[i]->id);
5446       fprintf (dump_file, "\n");
5447
5448       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5449       for (i = 0; i < data->vgroups.length (); i++)
5450         {
5451           struct iv_group *group = data->vgroups[i];
5452
5453           if (group->related_cands)
5454             {
5455               fprintf (dump_file, "  Group %d:\t", group->id);
5456               dump_bitmap (dump_file, group->related_cands);
5457             }
5458         }
5459       fprintf (dump_file, "\n");
5460     }
5461 }
5462
5463 /* Determines costs of computing use of iv with an iv candidate.  */
5464
5465 static void
5466 determine_group_iv_costs (struct ivopts_data *data)
5467 {
5468   unsigned i, j;
5469   struct iv_cand *cand;
5470   struct iv_group *group;
5471   bitmap to_clear = BITMAP_ALLOC (NULL);
5472
5473   alloc_use_cost_map (data);
5474
5475   for (i = 0; i < data->vgroups.length (); i++)
5476     {
5477       group = data->vgroups[i];
5478
5479       if (data->consider_all_candidates)
5480         {
5481           for (j = 0; j < data->vcands.length (); j++)
5482             {
5483               cand = data->vcands[j];
5484               determine_group_iv_cost (data, group, cand);
5485             }
5486         }
5487       else
5488         {
5489           bitmap_iterator bi;
5490
5491           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5492             {
5493               cand = data->vcands[j];
5494               if (!determine_group_iv_cost (data, group, cand))
5495                 bitmap_set_bit (to_clear, j);
5496             }
5497
5498           /* Remove the candidates for that the cost is infinite from
5499              the list of related candidates.  */
5500           bitmap_and_compl_into (group->related_cands, to_clear);
5501           bitmap_clear (to_clear);
5502         }
5503     }
5504
5505   BITMAP_FREE (to_clear);
5506
5507   if (dump_file && (dump_flags & TDF_DETAILS))
5508     {
5509       bitmap_iterator bi;
5510
5511       /* Dump invariant variables.  */
5512       fprintf (dump_file, "\n<Invariant Vars>:\n");
5513       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5514         {
5515           struct version_info *info = ver_info (data, i);
5516           if (info->inv_id)
5517             {
5518               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5519               print_generic_expr (dump_file, info->name, TDF_SLIM);
5520               fprintf (dump_file, "%s\n",
5521                        info->has_nonlin_use ? "" : "\t(eliminable)");
5522             }
5523         }
5524
5525       /* Dump invariant expressions.  */
5526       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5527       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5528
5529       for (hash_table<iv_inv_expr_hasher>::iterator it
5530            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5531            ++it)
5532         list.safe_push (*it);
5533
5534       list.qsort (sort_iv_inv_expr_ent);
5535
5536       for (i = 0; i < list.length (); ++i)
5537         {
5538           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5539           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5540           fprintf (dump_file, "\n");
5541         }
5542
5543       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5544
5545       for (i = 0; i < data->vgroups.length (); i++)
5546         {
5547           group = data->vgroups[i];
5548
5549           fprintf (dump_file, "Group %d:\n", i);
5550           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5551           for (j = 0; j < group->n_map_members; j++)
5552             {
5553               if (!group->cost_map[j].cand
5554                   || group->cost_map[j].cost.infinite_cost_p ())
5555                 continue;
5556
5557               fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
5558                        group->cost_map[j].cand->id,
5559                        group->cost_map[j].cost.cost,
5560                        group->cost_map[j].cost.complexity);
5561               if (!group->cost_map[j].inv_exprs
5562                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5563                 fprintf (dump_file, "NIL;\t");
5564               else
5565                 bitmap_print (dump_file,
5566                               group->cost_map[j].inv_exprs, "", ";\t");
5567               if (!group->cost_map[j].inv_vars
5568                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5569                 fprintf (dump_file, "NIL;\n");
5570               else
5571                 bitmap_print (dump_file,
5572                               group->cost_map[j].inv_vars, "", "\n");
5573             }
5574
5575           fprintf (dump_file, "\n");
5576         }
5577       fprintf (dump_file, "\n");
5578     }
5579 }
5580
5581 /* Determines cost of the candidate CAND.  */
5582
5583 static void
5584 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5585 {
5586   comp_cost cost_base;
5587   int64_t cost, cost_step;
5588   tree base;
5589
5590   gcc_assert (cand->iv != NULL);
5591
5592   /* There are two costs associated with the candidate -- its increment
5593      and its initialization.  The second is almost negligible for any loop
5594      that rolls enough, so we take it just very little into account.  */
5595
5596   base = cand->iv->base;
5597   cost_base = force_var_cost (data, base, NULL);
5598   /* It will be exceptional that the iv register happens to be initialized with
5599      the proper value at no cost.  In general, there will at least be a regcopy
5600      or a const set.  */
5601   if (cost_base.cost == 0)
5602     cost_base.cost = COSTS_N_INSNS (1);
5603   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5604
5605   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5606
5607   /* Prefer the original ivs unless we may gain something by replacing it.
5608      The reason is to make debugging simpler; so this is not relevant for
5609      artificial ivs created by other optimization passes.  */
5610   if (cand->pos != IP_ORIGINAL
5611       || !SSA_NAME_VAR (cand->var_before)
5612       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5613     cost++;
5614
5615   /* Prefer not to insert statements into latch unless there are some
5616      already (so that we do not create unnecessary jumps).  */
5617   if (cand->pos == IP_END
5618       && empty_block_p (ip_end_pos (data->current_loop)))
5619     cost++;
5620
5621   cand->cost = cost;
5622   cand->cost_step = cost_step;
5623 }
5624
5625 /* Determines costs of computation of the candidates.  */
5626
5627 static void
5628 determine_iv_costs (struct ivopts_data *data)
5629 {
5630   unsigned i;
5631
5632   if (dump_file && (dump_flags & TDF_DETAILS))
5633     {
5634       fprintf (dump_file, "<Candidate Costs>:\n");
5635       fprintf (dump_file, "  cand\tcost\n");
5636     }
5637
5638   for (i = 0; i < data->vcands.length (); i++)
5639     {
5640       struct iv_cand *cand = data->vcands[i];
5641
5642       determine_iv_cost (data, cand);
5643
5644       if (dump_file && (dump_flags & TDF_DETAILS))
5645         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5646     }
5647
5648   if (dump_file && (dump_flags & TDF_DETAILS))
5649     fprintf (dump_file, "\n");
5650 }
5651
5652 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5653    induction variables.  Note N_INVS includes both invariant variables and
5654    invariant expressions.  */
5655
5656 static unsigned
5657 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5658                               unsigned n_cands)
5659 {
5660   unsigned cost;
5661   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5662   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5663   bool speed = data->speed;
5664
5665   /* If there is a call in the loop body, the call-clobbered registers
5666      are not available for loop invariants.  */
5667   if (data->body_includes_call)
5668     available_regs = available_regs - target_clobbered_regs;
5669
5670   /* If we have enough registers.  */
5671   if (regs_needed + target_res_regs < available_regs)
5672     cost = n_new;
5673   /* If close to running out of registers, try to preserve them.  */
5674   else if (regs_needed <= available_regs)
5675     cost = target_reg_cost [speed] * regs_needed;
5676   /* If we run out of available registers but the number of candidates
5677      does not, we penalize extra registers using target_spill_cost.  */
5678   else if (n_cands <= available_regs)
5679     cost = target_reg_cost [speed] * available_regs
5680            + target_spill_cost [speed] * (regs_needed - available_regs);
5681   /* If the number of candidates runs out available registers, we penalize
5682      extra candidate registers using target_spill_cost * 2.  Because it is
5683      more expensive to spill induction variable than invariant.  */
5684   else
5685     cost = target_reg_cost [speed] * available_regs
5686            + target_spill_cost [speed] * (n_cands - available_regs) * 2
5687            + target_spill_cost [speed] * (regs_needed - n_cands);
5688
5689   /* Finally, add the number of candidates, so that we prefer eliminating
5690      induction variables if possible.  */
5691   return cost + n_cands;
5692 }
5693
5694 /* For each size of the induction variable set determine the penalty.  */
5695
5696 static void
5697 determine_set_costs (struct ivopts_data *data)
5698 {
5699   unsigned j, n;
5700   gphi *phi;
5701   gphi_iterator psi;
5702   tree op;
5703   struct loop *loop = data->current_loop;
5704   bitmap_iterator bi;
5705
5706   if (dump_file && (dump_flags & TDF_DETAILS))
5707     {
5708       fprintf (dump_file, "<Global Costs>:\n");
5709       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5710       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5711       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5712       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5713     }
5714
5715   n = 0;
5716   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5717     {
5718       phi = psi.phi ();
5719       op = PHI_RESULT (phi);
5720
5721       if (virtual_operand_p (op))
5722         continue;
5723
5724       if (get_iv (data, op))
5725         continue;
5726
5727       if (!POINTER_TYPE_P (TREE_TYPE (op))
5728           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5729         continue;
5730
5731       n++;
5732     }
5733
5734   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5735     {
5736       struct version_info *info = ver_info (data, j);
5737
5738       if (info->inv_id && info->has_nonlin_use)
5739         n++;
5740     }
5741
5742   data->regs_used = n;
5743   if (dump_file && (dump_flags & TDF_DETAILS))
5744     fprintf (dump_file, "  regs_used %d\n", n);
5745
5746   if (dump_file && (dump_flags & TDF_DETAILS))
5747     {
5748       fprintf (dump_file, "  cost for size:\n");
5749       fprintf (dump_file, "  ivs\tcost\n");
5750       for (j = 0; j <= 2 * target_avail_regs; j++)
5751         fprintf (dump_file, "  %d\t%d\n", j,
5752                  ivopts_estimate_reg_pressure (data, 0, j));
5753       fprintf (dump_file, "\n");
5754     }
5755 }
5756
5757 /* Returns true if A is a cheaper cost pair than B.  */
5758
5759 static bool
5760 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5761 {
5762   if (!a)
5763     return false;
5764
5765   if (!b)
5766     return true;
5767
5768   if (a->cost < b->cost)
5769     return true;
5770
5771   if (b->cost < a->cost)
5772     return false;
5773
5774   /* In case the costs are the same, prefer the cheaper candidate.  */
5775   if (a->cand->cost < b->cand->cost)
5776     return true;
5777
5778   return false;
5779 }
5780
5781 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5782    for more expensive, equal and cheaper respectively.  */
5783
5784 static int
5785 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5786 {
5787   if (cheaper_cost_pair (a, b))
5788     return -1;
5789   if (cheaper_cost_pair (b, a))
5790     return 1;
5791
5792   return 0;
5793 }
5794
5795 /* Returns candidate by that USE is expressed in IVS.  */
5796
5797 static struct cost_pair *
5798 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5799 {
5800   return ivs->cand_for_group[group->id];
5801 }
5802
5803 /* Computes the cost field of IVS structure.  */
5804
5805 static void
5806 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5807 {
5808   comp_cost cost = ivs->cand_use_cost;
5809
5810   cost += ivs->cand_cost;
5811   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5812   ivs->cost = cost;
5813 }
5814
5815 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5816    and IVS.  */
5817
5818 static void
5819 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5820 {
5821   bitmap_iterator bi;
5822   unsigned iid;
5823
5824   if (!invs)
5825     return;
5826
5827   gcc_assert (n_inv_uses != NULL);
5828   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5829     {
5830       n_inv_uses[iid]--;
5831       if (n_inv_uses[iid] == 0)
5832         ivs->n_invs--;
5833     }
5834 }
5835
5836 /* Set USE not to be expressed by any candidate in IVS.  */
5837
5838 static void
5839 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5840                  struct iv_group *group)
5841 {
5842   unsigned gid = group->id, cid;
5843   struct cost_pair *cp;
5844
5845   cp = ivs->cand_for_group[gid];
5846   if (!cp)
5847     return;
5848   cid = cp->cand->id;
5849
5850   ivs->bad_groups++;
5851   ivs->cand_for_group[gid] = NULL;
5852   ivs->n_cand_uses[cid]--;
5853
5854   if (ivs->n_cand_uses[cid] == 0)
5855     {
5856       bitmap_clear_bit (ivs->cands, cid);
5857       ivs->n_cands--;
5858       ivs->cand_cost -= cp->cand->cost;
5859       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5860       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5861     }
5862
5863   ivs->cand_use_cost -= cp->cost;
5864   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5865   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5866   iv_ca_recount_cost (data, ivs);
5867 }
5868
5869 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5870    IVS.  */
5871
5872 static void
5873 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5874 {
5875   bitmap_iterator bi;
5876   unsigned iid;
5877
5878   if (!invs)
5879     return;
5880
5881   gcc_assert (n_inv_uses != NULL);
5882   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5883     {
5884       n_inv_uses[iid]++;
5885       if (n_inv_uses[iid] == 1)
5886         ivs->n_invs++;
5887     }
5888 }
5889
5890 /* Set cost pair for GROUP in set IVS to CP.  */
5891
5892 static void
5893 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5894               struct iv_group *group, struct cost_pair *cp)
5895 {
5896   unsigned gid = group->id, cid;
5897
5898   if (ivs->cand_for_group[gid] == cp)
5899     return;
5900
5901   if (ivs->cand_for_group[gid])
5902     iv_ca_set_no_cp (data, ivs, group);
5903
5904   if (cp)
5905     {
5906       cid = cp->cand->id;
5907
5908       ivs->bad_groups--;
5909       ivs->cand_for_group[gid] = cp;
5910       ivs->n_cand_uses[cid]++;
5911       if (ivs->n_cand_uses[cid] == 1)
5912         {
5913           bitmap_set_bit (ivs->cands, cid);
5914           ivs->n_cands++;
5915           ivs->cand_cost += cp->cand->cost;
5916           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5917           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5918         }
5919
5920       ivs->cand_use_cost += cp->cost;
5921       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5922       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5923       iv_ca_recount_cost (data, ivs);
5924     }
5925 }
5926
5927 /* Extend set IVS by expressing USE by some of the candidates in it
5928    if possible.  Consider all important candidates if candidates in
5929    set IVS don't give any result.  */
5930
5931 static void
5932 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5933                struct iv_group *group)
5934 {
5935   struct cost_pair *best_cp = NULL, *cp;
5936   bitmap_iterator bi;
5937   unsigned i;
5938   struct iv_cand *cand;
5939
5940   gcc_assert (ivs->upto >= group->id);
5941   ivs->upto++;
5942   ivs->bad_groups++;
5943
5944   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5945     {
5946       cand = data->vcands[i];
5947       cp = get_group_iv_cost (data, group, cand);
5948       if (cheaper_cost_pair (cp, best_cp))
5949         best_cp = cp;
5950     }
5951
5952   if (best_cp == NULL)
5953     {
5954       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5955         {
5956           cand = data->vcands[i];
5957           cp = get_group_iv_cost (data, group, cand);
5958           if (cheaper_cost_pair (cp, best_cp))
5959             best_cp = cp;
5960         }
5961     }
5962
5963   iv_ca_set_cp (data, ivs, group, best_cp);
5964 }
5965
5966 /* Get cost for assignment IVS.  */
5967
5968 static comp_cost
5969 iv_ca_cost (struct iv_ca *ivs)
5970 {
5971   /* This was a conditional expression but it triggered a bug in
5972      Sun C 5.5.  */
5973   if (ivs->bad_groups)
5974     return infinite_cost;
5975   else
5976     return ivs->cost;
5977 }
5978
5979 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5980    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
5981    respectively.  */
5982
5983 static int
5984 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5985                     struct iv_group *group, struct cost_pair *old_cp,
5986                     struct cost_pair *new_cp)
5987 {
5988   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5989   unsigned old_n_invs = ivs->n_invs;
5990   iv_ca_set_cp (data, ivs, group, new_cp);
5991   unsigned new_n_invs = ivs->n_invs;
5992   iv_ca_set_cp (data, ivs, group, old_cp);
5993
5994   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5995 }
5996
5997 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5998    it before NEXT.  */
5999
6000 static struct iv_ca_delta *
6001 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6002                  struct cost_pair *new_cp, struct iv_ca_delta *next)
6003 {
6004   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6005
6006   change->group = group;
6007   change->old_cp = old_cp;
6008   change->new_cp = new_cp;
6009   change->next = next;
6010
6011   return change;
6012 }
6013
6014 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6015    are rewritten.  */
6016
6017 static struct iv_ca_delta *
6018 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6019 {
6020   struct iv_ca_delta *last;
6021
6022   if (!l2)
6023     return l1;
6024
6025   if (!l1)
6026     return l2;
6027
6028   for (last = l1; last->next; last = last->next)
6029     continue;
6030   last->next = l2;
6031
6032   return l1;
6033 }
6034
6035 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6036
6037 static struct iv_ca_delta *
6038 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6039 {
6040   struct iv_ca_delta *act, *next, *prev = NULL;
6041
6042   for (act = delta; act; act = next)
6043     {
6044       next = act->next;
6045       act->next = prev;
6046       prev = act;
6047
6048       std::swap (act->old_cp, act->new_cp);
6049     }
6050
6051   return prev;
6052 }
6053
6054 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6055    reverted instead.  */
6056
6057 static void
6058 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6059                     struct iv_ca_delta *delta, bool forward)
6060 {
6061   struct cost_pair *from, *to;
6062   struct iv_ca_delta *act;
6063
6064   if (!forward)
6065     delta = iv_ca_delta_reverse (delta);
6066
6067   for (act = delta; act; act = act->next)
6068     {
6069       from = act->old_cp;
6070       to = act->new_cp;
6071       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6072       iv_ca_set_cp (data, ivs, act->group, to);
6073     }
6074
6075   if (!forward)
6076     iv_ca_delta_reverse (delta);
6077 }
6078
6079 /* Returns true if CAND is used in IVS.  */
6080
6081 static bool
6082 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6083 {
6084   return ivs->n_cand_uses[cand->id] > 0;
6085 }
6086
6087 /* Returns number of induction variable candidates in the set IVS.  */
6088
6089 static unsigned
6090 iv_ca_n_cands (struct iv_ca *ivs)
6091 {
6092   return ivs->n_cands;
6093 }
6094
6095 /* Free the list of changes DELTA.  */
6096
6097 static void
6098 iv_ca_delta_free (struct iv_ca_delta **delta)
6099 {
6100   struct iv_ca_delta *act, *next;
6101
6102   for (act = *delta; act; act = next)
6103     {
6104       next = act->next;
6105       free (act);
6106     }
6107
6108   *delta = NULL;
6109 }
6110
6111 /* Allocates new iv candidates assignment.  */
6112
6113 static struct iv_ca *
6114 iv_ca_new (struct ivopts_data *data)
6115 {
6116   struct iv_ca *nw = XNEW (struct iv_ca);
6117
6118   nw->upto = 0;
6119   nw->bad_groups = 0;
6120   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6121                                  data->vgroups.length ());
6122   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6123   nw->cands = BITMAP_ALLOC (NULL);
6124   nw->n_cands = 0;
6125   nw->n_invs = 0;
6126   nw->cand_use_cost = no_cost;
6127   nw->cand_cost = 0;
6128   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6129   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6130   nw->cost = no_cost;
6131
6132   return nw;
6133 }
6134
6135 /* Free memory occupied by the set IVS.  */
6136
6137 static void
6138 iv_ca_free (struct iv_ca **ivs)
6139 {
6140   free ((*ivs)->cand_for_group);
6141   free ((*ivs)->n_cand_uses);
6142   BITMAP_FREE ((*ivs)->cands);
6143   free ((*ivs)->n_inv_var_uses);
6144   free ((*ivs)->n_inv_expr_uses);
6145   free (*ivs);
6146   *ivs = NULL;
6147 }
6148
6149 /* Dumps IVS to FILE.  */
6150
6151 static void
6152 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6153 {
6154   unsigned i;
6155   comp_cost cost = iv_ca_cost (ivs);
6156
6157   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6158            cost.complexity);
6159   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6160            "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6161            ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6162   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6163
6164   for (i = 0; i < ivs->upto; i++)
6165     {
6166       struct iv_group *group = data->vgroups[i];
6167       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6168       if (cp)
6169         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6170                  "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6171                  cp->cost.cost, cp->cost.complexity);
6172       else
6173         fprintf (file, "   group:%d --> ??\n", group->id);
6174     }
6175
6176   const char *pref = "";
6177   fprintf (file, "  invariant variables: ");
6178   for (i = 1; i <= data->max_inv_var_id; i++)
6179     if (ivs->n_inv_var_uses[i])
6180       {
6181         fprintf (file, "%s%d", pref, i);
6182         pref = ", ";
6183       }
6184
6185   pref = "";
6186   fprintf (file, "\n  invariant expressions: ");
6187   for (i = 1; i <= data->max_inv_expr_id; i++)
6188     if (ivs->n_inv_expr_uses[i])
6189       {
6190         fprintf (file, "%s%d", pref, i);
6191         pref = ", ";
6192       }
6193
6194   fprintf (file, "\n\n");
6195 }
6196
6197 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6198    new set, and store differences in DELTA.  Number of induction variables
6199    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6200    the function will try to find a solution with mimimal iv candidates.  */
6201
6202 static comp_cost
6203 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6204               struct iv_cand *cand, struct iv_ca_delta **delta,
6205               unsigned *n_ivs, bool min_ncand)
6206 {
6207   unsigned i;
6208   comp_cost cost;
6209   struct iv_group *group;
6210   struct cost_pair *old_cp, *new_cp;
6211
6212   *delta = NULL;
6213   for (i = 0; i < ivs->upto; i++)
6214     {
6215       group = data->vgroups[i];
6216       old_cp = iv_ca_cand_for_group (ivs, group);
6217
6218       if (old_cp
6219           && old_cp->cand == cand)
6220         continue;
6221
6222       new_cp = get_group_iv_cost (data, group, cand);
6223       if (!new_cp)
6224         continue;
6225
6226       if (!min_ncand)
6227         {
6228           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6229           /* Skip if new_cp depends on more invariants.  */
6230           if (cmp_invs > 0)
6231             continue;
6232
6233           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6234           /* Skip if new_cp is not cheaper.  */
6235           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6236             continue;
6237         }
6238
6239       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6240     }
6241
6242   iv_ca_delta_commit (data, ivs, *delta, true);
6243   cost = iv_ca_cost (ivs);
6244   if (n_ivs)
6245     *n_ivs = iv_ca_n_cands (ivs);
6246   iv_ca_delta_commit (data, ivs, *delta, false);
6247
6248   return cost;
6249 }
6250
6251 /* Try narrowing set IVS by removing CAND.  Return the cost of
6252    the new set and store the differences in DELTA.  START is
6253    the candidate with which we start narrowing.  */
6254
6255 static comp_cost
6256 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6257               struct iv_cand *cand, struct iv_cand *start,
6258               struct iv_ca_delta **delta)
6259 {
6260   unsigned i, ci;
6261   struct iv_group *group;
6262   struct cost_pair *old_cp, *new_cp, *cp;
6263   bitmap_iterator bi;
6264   struct iv_cand *cnd;
6265   comp_cost cost, best_cost, acost;
6266
6267   *delta = NULL;
6268   for (i = 0; i < data->vgroups.length (); i++)
6269     {
6270       group = data->vgroups[i];
6271
6272       old_cp = iv_ca_cand_for_group (ivs, group);
6273       if (old_cp->cand != cand)
6274         continue;
6275
6276       best_cost = iv_ca_cost (ivs);
6277       /* Start narrowing with START.  */
6278       new_cp = get_group_iv_cost (data, group, start);
6279
6280       if (data->consider_all_candidates)
6281         {
6282           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6283             {
6284               if (ci == cand->id || (start && ci == start->id))
6285                 continue;
6286
6287               cnd = data->vcands[ci];
6288
6289               cp = get_group_iv_cost (data, group, cnd);
6290               if (!cp)
6291                 continue;
6292
6293               iv_ca_set_cp (data, ivs, group, cp);
6294               acost = iv_ca_cost (ivs);
6295
6296               if (acost < best_cost)
6297                 {
6298                   best_cost = acost;
6299                   new_cp = cp;
6300                 }
6301             }
6302         }
6303       else
6304         {
6305           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6306             {
6307               if (ci == cand->id || (start && ci == start->id))
6308                 continue;
6309
6310               cnd = data->vcands[ci];
6311
6312               cp = get_group_iv_cost (data, group, cnd);
6313               if (!cp)
6314                 continue;
6315
6316               iv_ca_set_cp (data, ivs, group, cp);
6317               acost = iv_ca_cost (ivs);
6318
6319               if (acost < best_cost)
6320                 {
6321                   best_cost = acost;
6322                   new_cp = cp;
6323                 }
6324             }
6325         }
6326       /* Restore to old cp for use.  */
6327       iv_ca_set_cp (data, ivs, group, old_cp);
6328
6329       if (!new_cp)
6330         {
6331           iv_ca_delta_free (delta);
6332           return infinite_cost;
6333         }
6334
6335       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6336     }
6337
6338   iv_ca_delta_commit (data, ivs, *delta, true);
6339   cost = iv_ca_cost (ivs);
6340   iv_ca_delta_commit (data, ivs, *delta, false);
6341
6342   return cost;
6343 }
6344
6345 /* Try optimizing the set of candidates IVS by removing candidates different
6346    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6347    differences in DELTA.  */
6348
6349 static comp_cost
6350 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6351              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6352 {
6353   bitmap_iterator bi;
6354   struct iv_ca_delta *act_delta, *best_delta;
6355   unsigned i;
6356   comp_cost best_cost, acost;
6357   struct iv_cand *cand;
6358
6359   best_delta = NULL;
6360   best_cost = iv_ca_cost (ivs);
6361
6362   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6363     {
6364       cand = data->vcands[i];
6365
6366       if (cand == except_cand)
6367         continue;
6368
6369       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6370
6371       if (acost < best_cost)
6372         {
6373           best_cost = acost;
6374           iv_ca_delta_free (&best_delta);
6375           best_delta = act_delta;
6376         }
6377       else
6378         iv_ca_delta_free (&act_delta);
6379     }
6380
6381   if (!best_delta)
6382     {
6383       *delta = NULL;
6384       return best_cost;
6385     }
6386
6387   /* Recurse to possibly remove other unnecessary ivs.  */
6388   iv_ca_delta_commit (data, ivs, best_delta, true);
6389   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6390   iv_ca_delta_commit (data, ivs, best_delta, false);
6391   *delta = iv_ca_delta_join (best_delta, *delta);
6392   return best_cost;
6393 }
6394
6395 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6396    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6397    the corresponding cost_pair, otherwise just return BEST_CP.  */
6398
6399 static struct cost_pair*
6400 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6401                         unsigned int cand_idx, struct iv_cand *old_cand,
6402                         struct cost_pair *best_cp)
6403 {
6404   struct iv_cand *cand;
6405   struct cost_pair *cp;
6406
6407   gcc_assert (old_cand != NULL && best_cp != NULL);
6408   if (cand_idx == old_cand->id)
6409     return best_cp;
6410
6411   cand = data->vcands[cand_idx];
6412   cp = get_group_iv_cost (data, group, cand);
6413   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6414     return cp;
6415
6416   return best_cp;
6417 }
6418
6419 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6420    which are used by more than one iv uses.  For each of those candidates,
6421    this function tries to represent iv uses under that candidate using
6422    other ones with lower local cost, then tries to prune the new set.
6423    If the new set has lower cost, It returns the new cost after recording
6424    candidate replacement in list DELTA.  */
6425
6426 static comp_cost
6427 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6428                struct iv_ca_delta **delta)
6429 {
6430   bitmap_iterator bi, bj;
6431   unsigned int i, j, k;
6432   struct iv_cand *cand;
6433   comp_cost orig_cost, acost;
6434   struct iv_ca_delta *act_delta, *tmp_delta;
6435   struct cost_pair *old_cp, *best_cp = NULL;
6436
6437   *delta = NULL;
6438   orig_cost = iv_ca_cost (ivs);
6439
6440   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6441     {
6442       if (ivs->n_cand_uses[i] == 1
6443           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6444         continue;
6445
6446       cand = data->vcands[i];
6447
6448       act_delta = NULL;
6449       /*  Represent uses under current candidate using other ones with
6450           lower local cost.  */
6451       for (j = 0; j < ivs->upto; j++)
6452         {
6453           struct iv_group *group = data->vgroups[j];
6454           old_cp = iv_ca_cand_for_group (ivs, group);
6455
6456           if (old_cp->cand != cand)
6457             continue;
6458
6459           best_cp = old_cp;
6460           if (data->consider_all_candidates)
6461             for (k = 0; k < data->vcands.length (); k++)
6462               best_cp = cheaper_cost_with_cand (data, group, k,
6463                                                 old_cp->cand, best_cp);
6464           else
6465             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6466               best_cp = cheaper_cost_with_cand (data, group, k,
6467                                                 old_cp->cand, best_cp);
6468
6469           if (best_cp == old_cp)
6470             continue;
6471
6472           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6473         }
6474       /* No need for further prune.  */
6475       if (!act_delta)
6476         continue;
6477
6478       /* Prune the new candidate set.  */
6479       iv_ca_delta_commit (data, ivs, act_delta, true);
6480       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6481       iv_ca_delta_commit (data, ivs, act_delta, false);
6482       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6483
6484       if (acost < orig_cost)
6485         {
6486           *delta = act_delta;
6487           return acost;
6488         }
6489       else
6490         iv_ca_delta_free (&act_delta);
6491     }
6492
6493   return orig_cost;
6494 }
6495
6496 /* Tries to extend the sets IVS in the best possible way in order to
6497    express the GROUP.  If ORIGINALP is true, prefer candidates from
6498    the original set of IVs, otherwise favor important candidates not
6499    based on any memory object.  */
6500
6501 static bool
6502 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6503                   struct iv_group *group, bool originalp)
6504 {
6505   comp_cost best_cost, act_cost;
6506   unsigned i;
6507   bitmap_iterator bi;
6508   struct iv_cand *cand;
6509   struct iv_ca_delta *best_delta = NULL, *act_delta;
6510   struct cost_pair *cp;
6511
6512   iv_ca_add_group (data, ivs, group);
6513   best_cost = iv_ca_cost (ivs);
6514   cp = iv_ca_cand_for_group (ivs, group);
6515   if (cp)
6516     {
6517       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6518       iv_ca_set_no_cp (data, ivs, group);
6519     }
6520
6521   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6522      first try important candidates not based on any memory object.  Only if
6523      this fails, try the specific ones.  Rationale -- in loops with many
6524      variables the best choice often is to use just one generic biv.  If we
6525      added here many ivs specific to the uses, the optimization algorithm later
6526      would be likely to get stuck in a local minimum, thus causing us to create
6527      too many ivs.  The approach from few ivs to more seems more likely to be
6528      successful -- starting from few ivs, replacing an expensive use by a
6529      specific iv should always be a win.  */
6530   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6531     {
6532       cand = data->vcands[i];
6533
6534       if (originalp && cand->pos !=IP_ORIGINAL)
6535         continue;
6536
6537       if (!originalp && cand->iv->base_object != NULL_TREE)
6538         continue;
6539
6540       if (iv_ca_cand_used_p (ivs, cand))
6541         continue;
6542
6543       cp = get_group_iv_cost (data, group, cand);
6544       if (!cp)
6545         continue;
6546
6547       iv_ca_set_cp (data, ivs, group, cp);
6548       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6549                                true);
6550       iv_ca_set_no_cp (data, ivs, group);
6551       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6552
6553       if (act_cost < best_cost)
6554         {
6555           best_cost = act_cost;
6556
6557           iv_ca_delta_free (&best_delta);
6558           best_delta = act_delta;
6559         }
6560       else
6561         iv_ca_delta_free (&act_delta);
6562     }
6563
6564   if (best_cost.infinite_cost_p ())
6565     {
6566       for (i = 0; i < group->n_map_members; i++)
6567         {
6568           cp = group->cost_map + i;
6569           cand = cp->cand;
6570           if (!cand)
6571             continue;
6572
6573           /* Already tried this.  */
6574           if (cand->important)
6575             {
6576               if (originalp && cand->pos == IP_ORIGINAL)
6577                 continue;
6578               if (!originalp && cand->iv->base_object == NULL_TREE)
6579                 continue;
6580             }
6581
6582           if (iv_ca_cand_used_p (ivs, cand))
6583             continue;
6584
6585           act_delta = NULL;
6586           iv_ca_set_cp (data, ivs, group, cp);
6587           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6588           iv_ca_set_no_cp (data, ivs, group);
6589           act_delta = iv_ca_delta_add (group,
6590                                        iv_ca_cand_for_group (ivs, group),
6591                                        cp, act_delta);
6592
6593           if (act_cost < best_cost)
6594             {
6595               best_cost = act_cost;
6596
6597               if (best_delta)
6598                 iv_ca_delta_free (&best_delta);
6599               best_delta = act_delta;
6600             }
6601           else
6602             iv_ca_delta_free (&act_delta);
6603         }
6604     }
6605
6606   iv_ca_delta_commit (data, ivs, best_delta, true);
6607   iv_ca_delta_free (&best_delta);
6608
6609   return !best_cost.infinite_cost_p ();
6610 }
6611
6612 /* Finds an initial assignment of candidates to uses.  */
6613
6614 static struct iv_ca *
6615 get_initial_solution (struct ivopts_data *data, bool originalp)
6616 {
6617   unsigned i;
6618   struct iv_ca *ivs = iv_ca_new (data);
6619
6620   for (i = 0; i < data->vgroups.length (); i++)
6621     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6622       {
6623         iv_ca_free (&ivs);
6624         return NULL;
6625       }
6626
6627   return ivs;
6628 }
6629
6630 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6631    points to a bool variable, this function tries to break local
6632    optimal fixed-point by replacing candidates in IVS if it's true.  */
6633
6634 static bool
6635 try_improve_iv_set (struct ivopts_data *data,
6636                     struct iv_ca *ivs, bool *try_replace_p)
6637 {
6638   unsigned i, n_ivs;
6639   comp_cost acost, best_cost = iv_ca_cost (ivs);
6640   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6641   struct iv_cand *cand;
6642
6643   /* Try extending the set of induction variables by one.  */
6644   for (i = 0; i < data->vcands.length (); i++)
6645     {
6646       cand = data->vcands[i];
6647
6648       if (iv_ca_cand_used_p (ivs, cand))
6649         continue;
6650
6651       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6652       if (!act_delta)
6653         continue;
6654
6655       /* If we successfully added the candidate and the set is small enough,
6656          try optimizing it by removing other candidates.  */
6657       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6658         {
6659           iv_ca_delta_commit (data, ivs, act_delta, true);
6660           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6661           iv_ca_delta_commit (data, ivs, act_delta, false);
6662           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6663         }
6664
6665       if (acost < best_cost)
6666         {
6667           best_cost = acost;
6668           iv_ca_delta_free (&best_delta);
6669           best_delta = act_delta;
6670         }
6671       else
6672         iv_ca_delta_free (&act_delta);
6673     }
6674
6675   if (!best_delta)
6676     {
6677       /* Try removing the candidates from the set instead.  */
6678       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6679
6680       if (!best_delta && *try_replace_p)
6681         {
6682           *try_replace_p = false;
6683           /* So far candidate selecting algorithm tends to choose fewer IVs
6684              so that it can handle cases in which loops have many variables
6685              but the best choice is often to use only one general biv.  One
6686              weakness is it can't handle opposite cases, in which different
6687              candidates should be chosen with respect to each use.  To solve
6688              the problem, we replace candidates in a manner described by the
6689              comments of iv_ca_replace, thus give general algorithm a chance
6690              to break local optimal fixed-point in these cases.  */
6691           best_cost = iv_ca_replace (data, ivs, &best_delta);
6692         }
6693
6694       if (!best_delta)
6695         return false;
6696     }
6697
6698   iv_ca_delta_commit (data, ivs, best_delta, true);
6699   iv_ca_delta_free (&best_delta);
6700   return best_cost == iv_ca_cost (ivs);
6701 }
6702
6703 /* Attempts to find the optimal set of induction variables.  We do simple
6704    greedy heuristic -- we try to replace at most one candidate in the selected
6705    solution and remove the unused ivs while this improves the cost.  */
6706
6707 static struct iv_ca *
6708 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6709 {
6710   struct iv_ca *set;
6711   bool try_replace_p = true;
6712
6713   /* Get the initial solution.  */
6714   set = get_initial_solution (data, originalp);
6715   if (!set)
6716     {
6717       if (dump_file && (dump_flags & TDF_DETAILS))
6718         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6719       return NULL;
6720     }
6721
6722   if (dump_file && (dump_flags & TDF_DETAILS))
6723     {
6724       fprintf (dump_file, "Initial set of candidates:\n");
6725       iv_ca_dump (data, dump_file, set);
6726     }
6727
6728   while (try_improve_iv_set (data, set, &try_replace_p))
6729     {
6730       if (dump_file && (dump_flags & TDF_DETAILS))
6731         {
6732           fprintf (dump_file, "Improved to:\n");
6733           iv_ca_dump (data, dump_file, set);
6734         }
6735     }
6736
6737   /* If the set has infinite_cost, it can't be optimal.  */
6738   if (iv_ca_cost (set).infinite_cost_p ())
6739     {
6740       if (dump_file && (dump_flags & TDF_DETAILS))
6741         fprintf (dump_file,
6742                  "Overflow to infinite cost in try_improve_iv_set.\n");
6743       iv_ca_free (&set);
6744     }
6745   return set;
6746 }
6747
6748 static struct iv_ca *
6749 find_optimal_iv_set (struct ivopts_data *data)
6750 {
6751   unsigned i;
6752   comp_cost cost, origcost;
6753   struct iv_ca *set, *origset;
6754
6755   /* Determine the cost based on a strategy that starts with original IVs,
6756      and try again using a strategy that prefers candidates not based
6757      on any IVs.  */
6758   origset = find_optimal_iv_set_1 (data, true);
6759   set = find_optimal_iv_set_1 (data, false);
6760
6761   if (!origset && !set)
6762     return NULL;
6763
6764   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6765   cost = set ? iv_ca_cost (set) : infinite_cost;
6766
6767   if (dump_file && (dump_flags & TDF_DETAILS))
6768     {
6769       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
6770                origcost.cost, origcost.complexity);
6771       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
6772                cost.cost, cost.complexity);
6773     }
6774
6775   /* Choose the one with the best cost.  */
6776   if (origcost <= cost)
6777     {
6778       if (set)
6779         iv_ca_free (&set);
6780       set = origset;
6781     }
6782   else if (origset)
6783     iv_ca_free (&origset);
6784
6785   for (i = 0; i < data->vgroups.length (); i++)
6786     {
6787       struct iv_group *group = data->vgroups[i];
6788       group->selected = iv_ca_cand_for_group (set, group)->cand;
6789     }
6790
6791   return set;
6792 }
6793
6794 /* Creates a new induction variable corresponding to CAND.  */
6795
6796 static void
6797 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6798 {
6799   gimple_stmt_iterator incr_pos;
6800   tree base;
6801   struct iv_use *use;
6802   struct iv_group *group;
6803   bool after = false;
6804
6805   gcc_assert (cand->iv != NULL);
6806
6807   switch (cand->pos)
6808     {
6809     case IP_NORMAL:
6810       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6811       break;
6812
6813     case IP_END:
6814       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6815       after = true;
6816       break;
6817
6818     case IP_AFTER_USE:
6819       after = true;
6820       /* fall through */
6821     case IP_BEFORE_USE:
6822       incr_pos = gsi_for_stmt (cand->incremented_at);
6823       break;
6824
6825     case IP_ORIGINAL:
6826       /* Mark that the iv is preserved.  */
6827       name_info (data, cand->var_before)->preserve_biv = true;
6828       name_info (data, cand->var_after)->preserve_biv = true;
6829
6830       /* Rewrite the increment so that it uses var_before directly.  */
6831       use = find_interesting_uses_op (data, cand->var_after);
6832       group = data->vgroups[use->group_id];
6833       group->selected = cand;
6834       return;
6835     }
6836
6837   gimple_add_tmp_var (cand->var_before);
6838
6839   base = unshare_expr (cand->iv->base);
6840
6841   create_iv (base, unshare_expr (cand->iv->step),
6842              cand->var_before, data->current_loop,
6843              &incr_pos, after, &cand->var_before, &cand->var_after);
6844 }
6845
6846 /* Creates new induction variables described in SET.  */
6847
6848 static void
6849 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6850 {
6851   unsigned i;
6852   struct iv_cand *cand;
6853   bitmap_iterator bi;
6854
6855   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6856     {
6857       cand = data->vcands[i];
6858       create_new_iv (data, cand);
6859     }
6860
6861   if (dump_file && (dump_flags & TDF_DETAILS))
6862     {
6863       fprintf (dump_file, "Selected IV set for loop %d",
6864                data->current_loop->num);
6865       if (data->loop_loc != UNKNOWN_LOCATION)
6866         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6867                  LOCATION_LINE (data->loop_loc));
6868       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6869                avg_loop_niter (data->current_loop));
6870       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6871       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6872         {
6873           cand = data->vcands[i];
6874           dump_cand (dump_file, cand);
6875         }
6876       fprintf (dump_file, "\n");
6877     }
6878 }
6879
6880 /* Rewrites USE (definition of iv used in a nonlinear expression)
6881    using candidate CAND.  */
6882
6883 static void
6884 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6885                             struct iv_use *use, struct iv_cand *cand)
6886 {
6887   gassign *ass;
6888   gimple_stmt_iterator bsi;
6889   tree comp, type = get_use_type (use), tgt;
6890
6891   /* An important special case -- if we are asked to express value of
6892      the original iv by itself, just exit; there is no need to
6893      introduce a new computation (that might also need casting the
6894      variable to unsigned and back).  */
6895   if (cand->pos == IP_ORIGINAL
6896       && cand->incremented_at == use->stmt)
6897     {
6898       tree op = NULL_TREE;
6899       enum tree_code stmt_code;
6900
6901       gcc_assert (is_gimple_assign (use->stmt));
6902       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6903
6904       /* Check whether we may leave the computation unchanged.
6905          This is the case only if it does not rely on other
6906          computations in the loop -- otherwise, the computation
6907          we rely upon may be removed in remove_unused_ivs,
6908          thus leading to ICE.  */
6909       stmt_code = gimple_assign_rhs_code (use->stmt);
6910       if (stmt_code == PLUS_EXPR
6911           || stmt_code == MINUS_EXPR
6912           || stmt_code == POINTER_PLUS_EXPR)
6913         {
6914           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6915             op = gimple_assign_rhs2 (use->stmt);
6916           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6917             op = gimple_assign_rhs1 (use->stmt);
6918         }
6919
6920       if (op != NULL_TREE)
6921         {
6922           if (expr_invariant_in_loop_p (data->current_loop, op))
6923             return;
6924           if (TREE_CODE (op) == SSA_NAME)
6925             {
6926               struct iv *iv = get_iv (data, op);
6927               if (iv != NULL && integer_zerop (iv->step))
6928                 return;
6929             }
6930         }
6931     }
6932
6933   switch (gimple_code (use->stmt))
6934     {
6935     case GIMPLE_PHI:
6936       tgt = PHI_RESULT (use->stmt);
6937
6938       /* If we should keep the biv, do not replace it.  */
6939       if (name_info (data, tgt)->preserve_biv)
6940         return;
6941
6942       bsi = gsi_after_labels (gimple_bb (use->stmt));
6943       break;
6944
6945     case GIMPLE_ASSIGN:
6946       tgt = gimple_assign_lhs (use->stmt);
6947       bsi = gsi_for_stmt (use->stmt);
6948       break;
6949
6950     default:
6951       gcc_unreachable ();
6952     }
6953
6954   aff_tree aff_inv, aff_var;
6955   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6956                               use, cand, &aff_inv, &aff_var))
6957     gcc_unreachable ();
6958
6959   unshare_aff_combination (&aff_inv);
6960   unshare_aff_combination (&aff_var);
6961   /* Prefer CSE opportunity than loop invariant by adding offset at last
6962      so that iv_uses have different offsets can be CSEed.  */
6963   poly_widest_int offset = aff_inv.offset;
6964   aff_inv.offset = 0;
6965
6966   gimple_seq stmt_list = NULL, seq = NULL;
6967   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6968   tree comp_op2 = aff_combination_to_tree (&aff_var);
6969   gcc_assert (comp_op1 && comp_op2);
6970
6971   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6972   gimple_seq_add_seq (&stmt_list, seq);
6973   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6974   gimple_seq_add_seq (&stmt_list, seq);
6975
6976   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6977     std::swap (comp_op1, comp_op2);
6978
6979   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6980     {
6981       comp = fold_build_pointer_plus (comp_op1,
6982                                       fold_convert (sizetype, comp_op2));
6983       comp = fold_build_pointer_plus (comp,
6984                                       wide_int_to_tree (sizetype, offset));
6985     }
6986   else
6987     {
6988       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6989                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
6990       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6991                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6992     }
6993
6994   comp = fold_convert (type, comp);
6995   if (!valid_gimple_rhs_p (comp)
6996       || (gimple_code (use->stmt) != GIMPLE_PHI
6997           /* We can't allow re-allocating the stmt as it might be pointed
6998              to still.  */
6999           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7000               >= gimple_num_ops (gsi_stmt (bsi)))))
7001     {
7002       comp = force_gimple_operand (comp, &seq, true, NULL);
7003       gimple_seq_add_seq (&stmt_list, seq);
7004       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7005         {
7006           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7007           /* As this isn't a plain copy we have to reset alignment
7008              information.  */
7009           if (SSA_NAME_PTR_INFO (comp))
7010             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7011         }
7012     }
7013
7014   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7015   if (gimple_code (use->stmt) == GIMPLE_PHI)
7016     {
7017       ass = gimple_build_assign (tgt, comp);
7018       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7019
7020       bsi = gsi_for_stmt (use->stmt);
7021       remove_phi_node (&bsi, false);
7022     }
7023   else
7024     {
7025       gimple_assign_set_rhs_from_tree (&bsi, comp);
7026       use->stmt = gsi_stmt (bsi);
7027     }
7028 }
7029
7030 /* Performs a peephole optimization to reorder the iv update statement with
7031    a mem ref to enable instruction combining in later phases. The mem ref uses
7032    the iv value before the update, so the reordering transformation requires
7033    adjustment of the offset. CAND is the selected IV_CAND.
7034
7035    Example:
7036
7037    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7038    iv2 = iv1 + 1;
7039
7040    if (t < val)      (1)
7041      goto L;
7042    goto Head;
7043
7044
7045    directly propagating t over to (1) will introduce overlapping live range
7046    thus increase register pressure. This peephole transform it into:
7047
7048
7049    iv2 = iv1 + 1;
7050    t = MEM_REF (base, iv2, 8, 8);
7051    if (t < val)
7052      goto L;
7053    goto Head;
7054 */
7055
7056 static void
7057 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7058 {
7059   tree var_after;
7060   gimple *iv_update, *stmt;
7061   basic_block bb;
7062   gimple_stmt_iterator gsi, gsi_iv;
7063
7064   if (cand->pos != IP_NORMAL)
7065     return;
7066
7067   var_after = cand->var_after;
7068   iv_update = SSA_NAME_DEF_STMT (var_after);
7069
7070   bb = gimple_bb (iv_update);
7071   gsi = gsi_last_nondebug_bb (bb);
7072   stmt = gsi_stmt (gsi);
7073
7074   /* Only handle conditional statement for now.  */
7075   if (gimple_code (stmt) != GIMPLE_COND)
7076     return;
7077
7078   gsi_prev_nondebug (&gsi);
7079   stmt = gsi_stmt (gsi);
7080   if (stmt != iv_update)
7081     return;
7082
7083   gsi_prev_nondebug (&gsi);
7084   if (gsi_end_p (gsi))
7085     return;
7086
7087   stmt = gsi_stmt (gsi);
7088   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7089     return;
7090
7091   if (stmt != use->stmt)
7092     return;
7093
7094   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7095     return;
7096
7097   if (dump_file && (dump_flags & TDF_DETAILS))
7098     {
7099       fprintf (dump_file, "Reordering \n");
7100       print_gimple_stmt (dump_file, iv_update, 0);
7101       print_gimple_stmt (dump_file, use->stmt, 0);
7102       fprintf (dump_file, "\n");
7103     }
7104
7105   gsi = gsi_for_stmt (use->stmt);
7106   gsi_iv = gsi_for_stmt (iv_update);
7107   gsi_move_before (&gsi_iv, &gsi);
7108
7109   cand->pos = IP_BEFORE_USE;
7110   cand->incremented_at = use->stmt;
7111 }
7112
7113 /* Return the alias pointer type that should be used for a MEM_REF
7114    associated with USE, which has type USE_PTR_ADDRESS.  */
7115
7116 static tree
7117 get_alias_ptr_type_for_ptr_address (iv_use *use)
7118 {
7119   gcall *call = as_a <gcall *> (use->stmt);
7120   switch (gimple_call_internal_fn (call))
7121     {
7122     case IFN_MASK_LOAD:
7123     case IFN_MASK_STORE:
7124     case IFN_MASK_LOAD_LANES:
7125     case IFN_MASK_STORE_LANES:
7126       /* The second argument contains the correct alias type.  */
7127       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7128       return TREE_TYPE (gimple_call_arg (call, 1));
7129
7130     default:
7131       gcc_unreachable ();
7132     }
7133 }
7134
7135
7136 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7137
7138 static void
7139 rewrite_use_address (struct ivopts_data *data,
7140                      struct iv_use *use, struct iv_cand *cand)
7141 {
7142   aff_tree aff;
7143   bool ok;
7144
7145   adjust_iv_update_pos (cand, use);
7146   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7147   gcc_assert (ok);
7148   unshare_aff_combination (&aff);
7149
7150   /* To avoid undefined overflow problems, all IV candidates use unsigned
7151      integer types.  The drawback is that this makes it impossible for
7152      create_mem_ref to distinguish an IV that is based on a memory object
7153      from one that represents simply an offset.
7154
7155      To work around this problem, we pass a hint to create_mem_ref that
7156      indicates which variable (if any) in aff is an IV based on a memory
7157      object.  Note that we only consider the candidate.  If this is not
7158      based on an object, the base of the reference is in some subexpression
7159      of the use -- but these will use pointer types, so they are recognized
7160      by the create_mem_ref heuristics anyway.  */
7161   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7162   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7163   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7164   tree type = use->mem_type;
7165   tree alias_ptr_type;
7166   if (use->type == USE_PTR_ADDRESS)
7167     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7168   else
7169     {
7170       gcc_assert (type == TREE_TYPE (*use->op_p));
7171       unsigned int align = get_object_alignment (*use->op_p);
7172       if (align != TYPE_ALIGN (type))
7173         type = build_aligned_type (type, align);
7174       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7175     }
7176   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7177                              iv, base_hint, data->speed);
7178
7179   if (use->type == USE_PTR_ADDRESS)
7180     {
7181       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7182       ref = fold_convert (get_use_type (use), ref);
7183       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7184                                       true, GSI_SAME_STMT);
7185     }
7186   else
7187     copy_ref_info (ref, *use->op_p);
7188
7189   *use->op_p = ref;
7190 }
7191
7192 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7193    candidate CAND.  */
7194
7195 static void
7196 rewrite_use_compare (struct ivopts_data *data,
7197                      struct iv_use *use, struct iv_cand *cand)
7198 {
7199   tree comp, op, bound;
7200   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7201   enum tree_code compare;
7202   struct iv_group *group = data->vgroups[use->group_id];
7203   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7204
7205   bound = cp->value;
7206   if (bound)
7207     {
7208       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7209       tree var_type = TREE_TYPE (var);
7210       gimple_seq stmts;
7211
7212       if (dump_file && (dump_flags & TDF_DETAILS))
7213         {
7214           fprintf (dump_file, "Replacing exit test: ");
7215           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7216         }
7217       compare = cp->comp;
7218       bound = unshare_expr (fold_convert (var_type, bound));
7219       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7220       if (stmts)
7221         gsi_insert_seq_on_edge_immediate (
7222                 loop_preheader_edge (data->current_loop),
7223                 stmts);
7224
7225       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7226       gimple_cond_set_lhs (cond_stmt, var);
7227       gimple_cond_set_code (cond_stmt, compare);
7228       gimple_cond_set_rhs (cond_stmt, op);
7229       return;
7230     }
7231
7232   /* The induction variable elimination failed; just express the original
7233      giv.  */
7234   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7235   gcc_assert (comp != NULL_TREE);
7236   gcc_assert (use->op_p != NULL);
7237   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7238                                          SSA_NAME_VAR (*use->op_p),
7239                                          true, GSI_SAME_STMT);
7240 }
7241
7242 /* Rewrite the groups using the selected induction variables.  */
7243
7244 static void
7245 rewrite_groups (struct ivopts_data *data)
7246 {
7247   unsigned i, j;
7248
7249   for (i = 0; i < data->vgroups.length (); i++)
7250     {
7251       struct iv_group *group = data->vgroups[i];
7252       struct iv_cand *cand = group->selected;
7253
7254       gcc_assert (cand);
7255
7256       if (group->type == USE_NONLINEAR_EXPR)
7257         {
7258           for (j = 0; j < group->vuses.length (); j++)
7259             {
7260               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7261               update_stmt (group->vuses[j]->stmt);
7262             }
7263         }
7264       else if (address_p (group->type))
7265         {
7266           for (j = 0; j < group->vuses.length (); j++)
7267             {
7268               rewrite_use_address (data, group->vuses[j], cand);
7269               update_stmt (group->vuses[j]->stmt);
7270             }
7271         }
7272       else
7273         {
7274           gcc_assert (group->type == USE_COMPARE);
7275
7276           for (j = 0; j < group->vuses.length (); j++)
7277             {
7278               rewrite_use_compare (data, group->vuses[j], cand);
7279               update_stmt (group->vuses[j]->stmt);
7280             }
7281         }
7282     }
7283 }
7284
7285 /* Removes the ivs that are not used after rewriting.  */
7286
7287 static void
7288 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7289 {
7290   unsigned j;
7291   bitmap_iterator bi;
7292
7293   /* Figure out an order in which to release SSA DEFs so that we don't
7294      release something that we'd have to propagate into a debug stmt
7295      afterwards.  */
7296   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7297     {
7298       struct version_info *info;
7299
7300       info = ver_info (data, j);
7301       if (info->iv
7302           && !integer_zerop (info->iv->step)
7303           && !info->inv_id
7304           && !info->iv->nonlin_use
7305           && !info->preserve_biv)
7306         {
7307           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7308
7309           tree def = info->iv->ssa_name;
7310
7311           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7312             {
7313               imm_use_iterator imm_iter;
7314               use_operand_p use_p;
7315               gimple *stmt;
7316               int count = 0;
7317
7318               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7319                 {
7320                   if (!gimple_debug_bind_p (stmt))
7321                     continue;
7322
7323                   /* We just want to determine whether to do nothing
7324                      (count == 0), to substitute the computed
7325                      expression into a single use of the SSA DEF by
7326                      itself (count == 1), or to use a debug temp
7327                      because the SSA DEF is used multiple times or as
7328                      part of a larger expression (count > 1). */
7329                   count++;
7330                   if (gimple_debug_bind_get_value (stmt) != def)
7331                     count++;
7332
7333                   if (count > 1)
7334                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7335                 }
7336
7337               if (!count)
7338                 continue;
7339
7340               struct iv_use dummy_use;
7341               struct iv_cand *best_cand = NULL, *cand;
7342               unsigned i, best_pref = 0, cand_pref;
7343
7344               memset (&dummy_use, 0, sizeof (dummy_use));
7345               dummy_use.iv = info->iv;
7346               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7347                 {
7348                   cand = data->vgroups[i]->selected;
7349                   if (cand == best_cand)
7350                     continue;
7351                   cand_pref = operand_equal_p (cand->iv->step,
7352                                                info->iv->step, 0)
7353                     ? 4 : 0;
7354                   cand_pref
7355                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7356                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7357                     ? 2 : 0;
7358                   cand_pref
7359                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7360                     ? 1 : 0;
7361                   if (best_cand == NULL || best_pref < cand_pref)
7362                     {
7363                       best_cand = cand;
7364                       best_pref = cand_pref;
7365                     }
7366                 }
7367
7368               if (!best_cand)
7369                 continue;
7370
7371               tree comp = get_computation_at (data->current_loop,
7372                                               SSA_NAME_DEF_STMT (def),
7373                                               &dummy_use, best_cand);
7374               if (!comp)
7375                 continue;
7376
7377               if (count > 1)
7378                 {
7379                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7380                   DECL_ARTIFICIAL (vexpr) = 1;
7381                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7382                   if (SSA_NAME_VAR (def))
7383                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7384                   else
7385                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7386                   gdebug *def_temp
7387                     = gimple_build_debug_bind (vexpr, comp, NULL);
7388                   gimple_stmt_iterator gsi;
7389
7390                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7391                     gsi = gsi_after_labels (gimple_bb
7392                                             (SSA_NAME_DEF_STMT (def)));
7393                   else
7394                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7395
7396                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7397                   comp = vexpr;
7398                 }
7399
7400               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7401                 {
7402                   if (!gimple_debug_bind_p (stmt))
7403                     continue;
7404
7405                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7406                     SET_USE (use_p, comp);
7407
7408                   update_stmt (stmt);
7409                 }
7410             }
7411         }
7412     }
7413 }
7414
7415 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7416    for hash_map::traverse.  */
7417
7418 bool
7419 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7420 {
7421   free (value);
7422   return true;
7423 }
7424
7425 /* Frees data allocated by the optimization of a single loop.  */
7426
7427 static void
7428 free_loop_data (struct ivopts_data *data)
7429 {
7430   unsigned i, j;
7431   bitmap_iterator bi;
7432   tree obj;
7433
7434   if (data->niters)
7435     {
7436       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7437       delete data->niters;
7438       data->niters = NULL;
7439     }
7440
7441   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7442     {
7443       struct version_info *info;
7444
7445       info = ver_info (data, i);
7446       info->iv = NULL;
7447       info->has_nonlin_use = false;
7448       info->preserve_biv = false;
7449       info->inv_id = 0;
7450     }
7451   bitmap_clear (data->relevant);
7452   bitmap_clear (data->important_candidates);
7453
7454   for (i = 0; i < data->vgroups.length (); i++)
7455     {
7456       struct iv_group *group = data->vgroups[i];
7457
7458       for (j = 0; j < group->vuses.length (); j++)
7459         free (group->vuses[j]);
7460       group->vuses.release ();
7461
7462       BITMAP_FREE (group->related_cands);
7463       for (j = 0; j < group->n_map_members; j++)
7464         {
7465           if (group->cost_map[j].inv_vars)
7466             BITMAP_FREE (group->cost_map[j].inv_vars);
7467           if (group->cost_map[j].inv_exprs)
7468             BITMAP_FREE (group->cost_map[j].inv_exprs);
7469         }
7470
7471       free (group->cost_map);
7472       free (group);
7473     }
7474   data->vgroups.truncate (0);
7475
7476   for (i = 0; i < data->vcands.length (); i++)
7477     {
7478       struct iv_cand *cand = data->vcands[i];
7479
7480       if (cand->inv_vars)
7481         BITMAP_FREE (cand->inv_vars);
7482       if (cand->inv_exprs)
7483         BITMAP_FREE (cand->inv_exprs);
7484       free (cand);
7485     }
7486   data->vcands.truncate (0);
7487
7488   if (data->version_info_size < num_ssa_names)
7489     {
7490       data->version_info_size = 2 * num_ssa_names;
7491       free (data->version_info);
7492       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7493     }
7494
7495   data->max_inv_var_id = 0;
7496   data->max_inv_expr_id = 0;
7497
7498   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7499     SET_DECL_RTL (obj, NULL_RTX);
7500
7501   decl_rtl_to_reset.truncate (0);
7502
7503   data->inv_expr_tab->empty ();
7504
7505   data->iv_common_cand_tab->empty ();
7506   data->iv_common_cands.truncate (0);
7507 }
7508
7509 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7510    loop tree.  */
7511
7512 static void
7513 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7514 {
7515   free_loop_data (data);
7516   free (data->version_info);
7517   BITMAP_FREE (data->relevant);
7518   BITMAP_FREE (data->important_candidates);
7519
7520   decl_rtl_to_reset.release ();
7521   data->vgroups.release ();
7522   data->vcands.release ();
7523   delete data->inv_expr_tab;
7524   data->inv_expr_tab = NULL;
7525   free_affine_expand_cache (&data->name_expansion_cache);
7526   delete data->iv_common_cand_tab;
7527   data->iv_common_cand_tab = NULL;
7528   data->iv_common_cands.release ();
7529   obstack_free (&data->iv_obstack, NULL);
7530 }
7531
7532 /* Returns true if the loop body BODY includes any function calls.  */
7533
7534 static bool
7535 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7536 {
7537   gimple_stmt_iterator gsi;
7538   unsigned i;
7539
7540   for (i = 0; i < num_nodes; i++)
7541     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7542       {
7543         gimple *stmt = gsi_stmt (gsi);
7544         if (is_gimple_call (stmt)
7545             && !gimple_call_internal_p (stmt)
7546             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7547           return true;
7548       }
7549   return false;
7550 }
7551
7552 /* Determine cost scaling factor for basic blocks in loop.  */
7553 #define COST_SCALING_FACTOR_BOUND (20)
7554
7555 static void
7556 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7557 {
7558   int lfreq = data->current_loop->header->count.to_frequency (cfun);
7559   if (!data->speed || lfreq <= 0)
7560     return;
7561
7562   int max_freq = lfreq;
7563   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7564     {
7565       body[i]->aux = (void *)(intptr_t) 1;
7566       if (max_freq < body[i]->count.to_frequency (cfun))
7567         max_freq = body[i]->count.to_frequency (cfun);
7568     }
7569   if (max_freq > lfreq)
7570     {
7571       int divisor, factor;
7572       /* Check if scaling factor itself needs to be scaled by the bound.  This
7573          is to avoid overflow when scaling cost according to profile info.  */
7574       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
7575         {
7576           divisor = max_freq;
7577           factor = COST_SCALING_FACTOR_BOUND;
7578         }
7579       else
7580         {
7581           divisor = lfreq;
7582           factor = 1;
7583         }
7584       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7585         {
7586           int bfreq = body[i]->count.to_frequency (cfun);
7587           if (bfreq <= lfreq)
7588             continue;
7589
7590           body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
7591         }
7592     }
7593 }
7594
7595 /* Optimizes the LOOP.  Returns true if anything changed.  */
7596
7597 static bool
7598 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
7599                            bitmap toremove)
7600 {
7601   bool changed = false;
7602   struct iv_ca *iv_ca;
7603   edge exit = single_dom_exit (loop);
7604   basic_block *body;
7605
7606   gcc_assert (!data->niters);
7607   data->current_loop = loop;
7608   data->loop_loc = find_loop_location (loop).get_location_t ();
7609   data->speed = optimize_loop_for_speed_p (loop);
7610
7611   if (dump_file && (dump_flags & TDF_DETAILS))
7612     {
7613       fprintf (dump_file, "Processing loop %d", loop->num);
7614       if (data->loop_loc != UNKNOWN_LOCATION)
7615         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7616                  LOCATION_LINE (data->loop_loc));
7617       fprintf (dump_file, "\n");
7618
7619       if (exit)
7620         {
7621           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7622                    exit->src->index, exit->dest->index);
7623           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7624           fprintf (dump_file, "\n");
7625         }
7626
7627       fprintf (dump_file, "\n");
7628     }
7629
7630   body = get_loop_body (loop);
7631   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7632   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7633
7634   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7635
7636   /* For each ssa name determines whether it behaves as an induction variable
7637      in some loop.  */
7638   if (!find_induction_variables (data))
7639     goto finish;
7640
7641   /* Finds interesting uses (item 1).  */
7642   find_interesting_uses (data);
7643   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7644     goto finish;
7645
7646   /* Determine cost scaling factor for basic blocks in loop.  */
7647   determine_scaling_factor (data, body);
7648
7649   /* Finds candidates for the induction variables (item 2).  */
7650   find_iv_candidates (data);
7651
7652   /* Calculates the costs (item 3, part 1).  */
7653   determine_iv_costs (data);
7654   determine_group_iv_costs (data);
7655   determine_set_costs (data);
7656
7657   /* Find the optimal set of induction variables (item 3, part 2).  */
7658   iv_ca = find_optimal_iv_set (data);
7659   /* Cleanup basic block aux field.  */
7660   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7661     body[i]->aux = NULL;
7662   if (!iv_ca)
7663     goto finish;
7664   changed = true;
7665
7666   /* Create the new induction variables (item 4, part 1).  */
7667   create_new_ivs (data, iv_ca);
7668   iv_ca_free (&iv_ca);
7669
7670   /* Rewrite the uses (item 4, part 2).  */
7671   rewrite_groups (data);
7672
7673   /* Remove the ivs that are unused after rewriting.  */
7674   remove_unused_ivs (data, toremove);
7675
7676 finish:
7677   free (body);
7678   free_loop_data (data);
7679
7680   return changed;
7681 }
7682
7683 /* Main entry point.  Optimizes induction variables in loops.  */
7684
7685 void
7686 tree_ssa_iv_optimize (void)
7687 {
7688   struct loop *loop;
7689   struct ivopts_data data;
7690   auto_bitmap toremove;
7691
7692   tree_ssa_iv_optimize_init (&data);
7693
7694   /* Optimize the loops starting with the innermost ones.  */
7695   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7696     {
7697       if (dump_file && (dump_flags & TDF_DETAILS))
7698         flow_loop_dump (loop, dump_file, NULL, 1);
7699
7700       tree_ssa_iv_optimize_loop (&data, loop, toremove);
7701     }
7702
7703   /* Remove eliminated IV defs.  */
7704   release_defs_bitset (toremove);
7705
7706   /* We have changed the structure of induction variables; it might happen
7707      that definitions in the scev database refer to some of them that were
7708      eliminated.  */
7709   scev_reset_htab ();
7710   /* Likewise niter and control-IV information.  */
7711   free_numbers_of_iterations_estimates (cfun);
7712
7713   tree_ssa_iv_optimize_finalize (&data);
7714 }
7715
7716 #include "gt-tree-ssa-loop-ivopts.h"