gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "memmodel.h"
  79 #include "tm_p.h"
  80 #include "ssa.h"
  81 #include "expmed.h"
  82 #include "insn-config.h"
  83 #include "emit-rtl.h"
  84 #include "recog.h"
  85 #include "cgraph.h"
  86 #include "gimple-pretty-print.h"
  87 #include "alias.h"
  88 #include "fold-const.h"
  89 #include "stor-layout.h"
  90 #include "tree-eh.h"
  91 #include "gimplify.h"
  92 #include "gimple-iterator.h"
  93 #include "gimplify-me.h"
  94 #include "tree-cfg.h"
  95 #include "tree-ssa-loop-ivopts.h"
  96 #include "tree-ssa-loop-manip.h"
  97 #include "tree-ssa-loop-niter.h"
  98 #include "tree-ssa-loop.h"
  99 #include "explow.h"
 100 #include "expr.h"
 101 #include "tree-dfa.h"
 102 #include "tree-ssa.h"
 103 #include "cfgloop.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "tree-affine.h"
 107 #include "tree-ssa-propagate.h"
 108 #include "tree-ssa-address.h"
 109 #include "builtins.h"
 110 #include "tree-vectorizer.h"
 111
 112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 113    cost of different addressing modes.  This should be moved to a TBD
 114    interface between the GIMPLE and RTL worlds.  */
 115
 116 /* The infinite cost.  */
 117 #define INFTY 1000000000
 118
 119 /* Returns the expected number of loop iterations for LOOP.
 120    The average trip count is computed from profile data if it
 121    exists. */
 122
 123 static inline HOST_WIDE_INT
 124 avg_loop_niter (struct loop *loop)
 125 {
 126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 127   if (niter == -1)
 128     {
 129       niter = likely_max_stmt_executions_int (loop);
 130
 131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
 132         return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_REF_ADDRESS,      /* Use is an address for an explicit memory
 170                            reference.  */
 171   USE_PTR_ADDRESS,      /* Use is a pointer argument to a function in
 172                            cases where the expansion of the function
 173                            will turn the argument into a normal address.  */
 174   USE_COMPARE           /* Use is a compare.  */
 175 };
 176
 177 /* Cost of a computation.  */
 178 struct comp_cost
 179 {
 180   comp_cost (): cost (0), complexity (0), scratch (0)
 181   {}
 182
 183   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
 184     : cost (cost), complexity (complexity), scratch (scratch)
 185   {}
 186
 187   /* Returns true if COST is infinite.  */
 188   bool infinite_cost_p ();
 189
 190   /* Adds costs COST1 and COST2.  */
 191   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 192
 193   /* Adds COST to the comp_cost.  */
 194   comp_cost operator+= (comp_cost cost);
 195
 196   /* Adds constant C to this comp_cost.  */
 197   comp_cost operator+= (HOST_WIDE_INT c);
 198
 199   /* Subtracts constant C to this comp_cost.  */
 200   comp_cost operator-= (HOST_WIDE_INT c);
 201
 202   /* Divide the comp_cost by constant C.  */
 203   comp_cost operator/= (HOST_WIDE_INT c);
 204
 205   /* Multiply the comp_cost by constant C.  */
 206   comp_cost operator*= (HOST_WIDE_INT c);
 207
 208   /* Subtracts costs COST1 and COST2.  */
 209   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 210
 211   /* Subtracts COST from this comp_cost.  */
 212   comp_cost operator-= (comp_cost cost);
 213
 214   /* Returns true if COST1 is smaller than COST2.  */
 215   friend bool operator< (comp_cost cost1, comp_cost cost2);
 216
 217   /* Returns true if COST1 and COST2 are equal.  */
 218   friend bool operator== (comp_cost cost1, comp_cost cost2);
 219
 220   /* Returns true if COST1 is smaller or equal than COST2.  */
 221   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 222
 223   int64_t cost;         /* The runtime cost.  */
 224   unsigned complexity;  /* The estimate of the complexity of the code for
 225                            the computation (in no concrete units --
 226                            complexity field should be larger for more
 227                            complex expressions and addressing modes).  */
 228   int64_t scratch;      /* Scratch used during cost computation.  */
 229 };
 230
 231 static const comp_cost no_cost;
 232 static const comp_cost infinite_cost (INFTY, 0, INFTY);
 233
 234 bool
 235 comp_cost::infinite_cost_p ()
 236 {
 237   return cost == INFTY;
 238 }
 239
 240 comp_cost
 241 operator+ (comp_cost cost1, comp_cost cost2)
 242 {
 243   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 244     return infinite_cost;
 245
 246   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
 247   cost1.cost += cost2.cost;
 248   cost1.complexity += cost2.complexity;
 249
 250   return cost1;
 251 }
 252
 253 comp_cost
 254 operator- (comp_cost cost1, comp_cost cost2)
 255 {
 256   if (cost1.infinite_cost_p ())
 257     return infinite_cost;
 258
 259   gcc_assert (!cost2.infinite_cost_p ());
 260   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
 261
 262   cost1.cost -= cost2.cost;
 263   cost1.complexity -= cost2.complexity;
 264
 265   return cost1;
 266 }
 267
 268 comp_cost
 269 comp_cost::operator+= (comp_cost cost)
 270 {
 271   *this = *this + cost;
 272   return *this;
 273 }
 274
 275 comp_cost
 276 comp_cost::operator+= (HOST_WIDE_INT c)
 277 {
 278   if (infinite_cost_p ())
 279     return *this;
 280
 281   gcc_assert (this->cost + c < infinite_cost.cost);
 282   this->cost += c;
 283
 284   return *this;
 285 }
 286
 287 comp_cost
 288 comp_cost::operator-= (HOST_WIDE_INT c)
 289 {
 290   if (infinite_cost_p ())
 291     return *this;
 292
 293   gcc_assert (this->cost - c < infinite_cost.cost);
 294   this->cost -= c;
 295
 296   return *this;
 297 }
 298
 299 comp_cost
 300 comp_cost::operator/= (HOST_WIDE_INT c)
 301 {
 302   gcc_assert (c != 0);
 303   if (infinite_cost_p ())
 304     return *this;
 305
 306   this->cost /= c;
 307
 308   return *this;
 309 }
 310
 311 comp_cost
 312 comp_cost::operator*= (HOST_WIDE_INT c)
 313 {
 314   if (infinite_cost_p ())
 315     return *this;
 316
 317   gcc_assert (this->cost * c < infinite_cost.cost);
 318   this->cost *= c;
 319
 320   return *this;
 321 }
 322
 323 comp_cost
 324 comp_cost::operator-= (comp_cost cost)
 325 {
 326   *this = *this - cost;
 327   return *this;
 328 }
 329
 330 bool
 331 operator< (comp_cost cost1, comp_cost cost2)
 332 {
 333   if (cost1.cost == cost2.cost)
 334     return cost1.complexity < cost2.complexity;
 335
 336   return cost1.cost < cost2.cost;
 337 }
 338
 339 bool
 340 operator== (comp_cost cost1, comp_cost cost2)
 341 {
 342   return cost1.cost == cost2.cost
 343     && cost1.complexity == cost2.complexity;
 344 }
 345
 346 bool
 347 operator<= (comp_cost cost1, comp_cost cost2)
 348 {
 349   return cost1 < cost2 || cost1 == cost2;
 350 }
 351
 352 struct iv_inv_expr_ent;
 353
 354 /* The candidate - cost pair.  */
 355 struct cost_pair
 356 {
 357   struct iv_cand *cand; /* The candidate.  */
 358   comp_cost cost;       /* The cost.  */
 359   enum tree_code comp;  /* For iv elimination, the comparison.  */
 360   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 361                            preserved when representing iv_use with iv_cand.  */
 362   bitmap inv_exprs;     /* The list of newly created invariant expressions
 363                            when representing iv_use with iv_cand.  */
 364   tree value;           /* For final value elimination, the expression for
 365                            the final value of the iv.  For iv elimination,
 366                            the new bound to compare with.  */
 367 };
 368
 369 /* Use.  */
 370 struct iv_use
 371 {
 372   unsigned id;          /* The id of the use.  */
 373   unsigned group_id;    /* The group id the use belongs to.  */
 374   enum use_type type;   /* Type of the use.  */
 375   tree mem_type;        /* The memory type to use when testing whether an
 376                            address is legitimate, and what the address's
 377                            cost is.  */
 378   struct iv *iv;        /* The induction variable it is based on.  */
 379   gimple *stmt;         /* Statement in that it occurs.  */
 380   tree *op_p;           /* The place where it occurs.  */
 381
 382   tree addr_base;       /* Base address with const offset stripped.  */
 383   poly_uint64_pod addr_offset;
 384                         /* Const offset stripped from base address.  */
 385 };
 386
 387 /* Group of uses.  */
 388 struct iv_group
 389 {
 390   /* The id of the group.  */
 391   unsigned id;
 392   /* Uses of the group are of the same type.  */
 393   enum use_type type;
 394   /* The set of "related" IV candidates, plus the important ones.  */
 395   bitmap related_cands;
 396   /* Number of IV candidates in the cost_map.  */
 397   unsigned n_map_members;
 398   /* The costs wrto the iv candidates.  */
 399   struct cost_pair *cost_map;
 400   /* The selected candidate for the group.  */
 401   struct iv_cand *selected;
 402   /* Uses in the group.  */
 403   vec<struct iv_use *> vuses;
 404 };
 405
 406 /* The position where the iv is computed.  */
 407 enum iv_position
 408 {
 409   IP_NORMAL,            /* At the end, just before the exit condition.  */
 410   IP_END,               /* At the end of the latch block.  */
 411   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 412   IP_AFTER_USE,         /* Immediately after a specific use.  */
 413   IP_ORIGINAL           /* The original biv.  */
 414 };
 415
 416 /* The induction variable candidate.  */
 417 struct iv_cand
 418 {
 419   unsigned id;          /* The number of the candidate.  */
 420   bool important;       /* Whether this is an "important" candidate, i.e. such
 421                            that it should be considered by all uses.  */
 422   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 423   gimple *incremented_at;/* For original biv, the statement where it is
 424                            incremented.  */
 425   tree var_before;      /* The variable used for it before increment.  */
 426   tree var_after;       /* The variable used for it after increment.  */
 427   struct iv *iv;        /* The value of the candidate.  NULL for
 428                            "pseudocandidate" used to indicate the possibility
 429                            to replace the final value of an iv by direct
 430                            computation of the value.  */
 431   unsigned cost;        /* Cost of the candidate.  */
 432   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 433   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 434                               where it is incremented.  */
 435   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 436                            iv_cand.  */
 437   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 438                            hanlde it as a new invariant expression which will
 439                            be hoisted out of loop.  */
 440   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 441                            smaller type.  */
 442 };
 443
 444 /* Hashtable entry for common candidate derived from iv uses.  */
 445 struct iv_common_cand
 446 {
 447   tree base;
 448   tree step;
 449   /* IV uses from which this common candidate is derived.  */
 450   auto_vec<struct iv_use *> uses;
 451   hashval_t hash;
 452 };
 453
 454 /* Hashtable helpers.  */
 455
 456 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 457 {
 458   static inline hashval_t hash (const iv_common_cand *);
 459   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 460 };
 461
 462 /* Hash function for possible common candidates.  */
 463
 464 inline hashval_t
 465 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 466 {
 467   return ccand->hash;
 468 }
 469
 470 /* Hash table equality function for common candidates.  */
 471
 472 inline bool
 473 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 474                               const iv_common_cand *ccand2)
 475 {
 476   return (ccand1->hash == ccand2->hash
 477           && operand_equal_p (ccand1->base, ccand2->base, 0)
 478           && operand_equal_p (ccand1->step, ccand2->step, 0)
 479           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 480               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 481 }
 482
 483 /* Loop invariant expression hashtable entry.  */
 484
 485 struct iv_inv_expr_ent
 486 {
 487   /* Tree expression of the entry.  */
 488   tree expr;
 489   /* Unique indentifier.  */
 490   int id;
 491   /* Hash value.  */
 492   hashval_t hash;
 493 };
 494
 495 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 496
 497 static int
 498 sort_iv_inv_expr_ent (const void *a, const void *b)
 499 {
 500   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 501   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 502
 503   unsigned id1 = (*e1)->id;
 504   unsigned id2 = (*e2)->id;
 505
 506   if (id1 < id2)
 507     return -1;
 508   else if (id1 > id2)
 509     return 1;
 510   else
 511     return 0;
 512 }
 513
 514 /* Hashtable helpers.  */
 515
 516 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 517 {
 518   static inline hashval_t hash (const iv_inv_expr_ent *);
 519   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 520 };
 521
 522 /* Return true if uses of type TYPE represent some form of address.  */
 523
 524 inline bool
 525 address_p (use_type type)
 526 {
 527   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
 528 }
 529
 530 /* Hash function for loop invariant expressions.  */
 531
 532 inline hashval_t
 533 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 534 {
 535   return expr->hash;
 536 }
 537
 538 /* Hash table equality function for expressions.  */
 539
 540 inline bool
 541 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 542                            const iv_inv_expr_ent *expr2)
 543 {
 544   return expr1->hash == expr2->hash
 545          && operand_equal_p (expr1->expr, expr2->expr, 0);
 546 }
 547
 548 struct ivopts_data
 549 {
 550   /* The currently optimized loop.  */
 551   struct loop *current_loop;
 552   location_t loop_loc;
 553
 554   /* Numbers of iterations for all exits of the current loop.  */
 555   hash_map<edge, tree_niter_desc *> *niters;
 556
 557   /* Number of registers used in it.  */
 558   unsigned regs_used;
 559
 560   /* The size of version_info array allocated.  */
 561   unsigned version_info_size;
 562
 563   /* The array of information for the ssa names.  */
 564   struct version_info *version_info;
 565
 566   /* The hashtable of loop invariant expressions created
 567      by ivopt.  */
 568   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 569
 570   /* The bitmap of indices in version_info whose value was changed.  */
 571   bitmap relevant;
 572
 573   /* The uses of induction variables.  */
 574   vec<iv_group *> vgroups;
 575
 576   /* The candidates.  */
 577   vec<iv_cand *> vcands;
 578
 579   /* A bitmap of important candidates.  */
 580   bitmap important_candidates;
 581
 582   /* Cache used by tree_to_aff_combination_expand.  */
 583   hash_map<tree, name_expansion *> *name_expansion_cache;
 584
 585   /* The hashtable of common candidates derived from iv uses.  */
 586   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 587
 588   /* The common candidates.  */
 589   vec<iv_common_cand *> iv_common_cands;
 590
 591   /* The maximum invariant variable id.  */
 592   unsigned max_inv_var_id;
 593
 594   /* The maximum invariant expression id.  */
 595   unsigned max_inv_expr_id;
 596
 597   /* Number of no_overflow BIVs which are not used in memory address.  */
 598   unsigned bivs_not_used_in_addr;
 599
 600   /* Obstack for iv structure.  */
 601   struct obstack iv_obstack;
 602
 603   /* Whether to consider just related and important candidates when replacing a
 604      use.  */
 605   bool consider_all_candidates;
 606
 607   /* Are we optimizing for speed?  */
 608   bool speed;
 609
 610   /* Whether the loop body includes any function calls.  */
 611   bool body_includes_call;
 612
 613   /* Whether the loop body can only be exited via single exit.  */
 614   bool loop_single_exit_p;
 615 };
 616
 617 /* An assignment of iv candidates to uses.  */
 618
 619 struct iv_ca
 620 {
 621   /* The number of uses covered by the assignment.  */
 622   unsigned upto;
 623
 624   /* Number of uses that cannot be expressed by the candidates in the set.  */
 625   unsigned bad_groups;
 626
 627   /* Candidate assigned to a use, together with the related costs.  */
 628   struct cost_pair **cand_for_group;
 629
 630   /* Number of times each candidate is used.  */
 631   unsigned *n_cand_uses;
 632
 633   /* The candidates used.  */
 634   bitmap cands;
 635
 636   /* The number of candidates in the set.  */
 637   unsigned n_cands;
 638
 639   /* The number of invariants needed, including both invariant variants and
 640      invariant expressions.  */
 641   unsigned n_invs;
 642
 643   /* Total cost of expressing uses.  */
 644   comp_cost cand_use_cost;
 645
 646   /* Total cost of candidates.  */
 647   int64_t cand_cost;
 648
 649   /* Number of times each invariant variable is used.  */
 650   unsigned *n_inv_var_uses;
 651
 652   /* Number of times each invariant expression is used.  */
 653   unsigned *n_inv_expr_uses;
 654
 655   /* Total cost of the assignment.  */
 656   comp_cost cost;
 657 };
 658
 659 /* Difference of two iv candidate assignments.  */
 660
 661 struct iv_ca_delta
 662 {
 663   /* Changed group.  */
 664   struct iv_group *group;
 665
 666   /* An old assignment (for rollback purposes).  */
 667   struct cost_pair *old_cp;
 668
 669   /* A new assignment.  */
 670   struct cost_pair *new_cp;
 671
 672   /* Next change in the list.  */
 673   struct iv_ca_delta *next;
 674 };
 675
 676 /* Bound on number of candidates below that all candidates are considered.  */
 677
 678 #define CONSIDER_ALL_CANDIDATES_BOUND \
 679   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 680
 681 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 682    optimizing such a loop would help, and it would take ages).  */
 683
 684 #define MAX_CONSIDERED_GROUPS \
 685   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 686
 687 /* If there are at most this number of ivs in the set, try removing unnecessary
 688    ivs from the set always.  */
 689
 690 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 691   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 692
 693 /* The list of trees for that the decl_rtl field must be reset is stored
 694    here.  */
 695
 696 static vec<tree> decl_rtl_to_reset;
 697
 698 static comp_cost force_expr_to_var_cost (tree, bool);
 699
 700 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 701
 702 edge
 703 single_dom_exit (struct loop *loop)
 704 {
 705   edge exit = single_exit (loop);
 706
 707   if (!exit)
 708     return NULL;
 709
 710   if (!just_once_each_iteration_p (loop, exit->src))
 711     return NULL;
 712
 713   return exit;
 714 }
 715
 716 /* Dumps information about the induction variable IV to FILE.  Don't dump
 717    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 718    preceding spaces indicated by INDENT_LEVEL.  */
 719
 720 void
 721 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 722 {
 723   const char *p;
 724   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 725
 726   if (indent_level > 4)
 727     indent_level = 4;
 728   p = spaces + 8 - (indent_level << 1);
 729
 730   fprintf (file, "%sIV struct:\n", p);
 731   if (iv->ssa_name && dump_name)
 732     {
 733       fprintf (file, "%s  SSA_NAME:\t", p);
 734       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 735       fprintf (file, "\n");
 736     }
 737
 738   fprintf (file, "%s  Type:\t", p);
 739   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 740   fprintf (file, "\n");
 741
 742   fprintf (file, "%s  Base:\t", p);
 743   print_generic_expr (file, iv->base, TDF_SLIM);
 744   fprintf (file, "\n");
 745
 746   fprintf (file, "%s  Step:\t", p);
 747   print_generic_expr (file, iv->step, TDF_SLIM);
 748   fprintf (file, "\n");
 749
 750   if (iv->base_object)
 751     {
 752       fprintf (file, "%s  Object:\t", p);
 753       print_generic_expr (file, iv->base_object, TDF_SLIM);
 754       fprintf (file, "\n");
 755     }
 756
 757   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 758
 759   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 760            p, iv->no_overflow ? "No-overflow" : "Overflow");
 761 }
 762
 763 /* Dumps information about the USE to FILE.  */
 764
 765 void
 766 dump_use (FILE *file, struct iv_use *use)
 767 {
 768   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 769   fprintf (file, "    At stmt:\t");
 770   print_gimple_stmt (file, use->stmt, 0);
 771   fprintf (file, "    At pos:\t");
 772   if (use->op_p)
 773     print_generic_expr (file, *use->op_p, TDF_SLIM);
 774   fprintf (file, "\n");
 775   dump_iv (file, use->iv, false, 2);
 776 }
 777
 778 /* Dumps information about the uses to FILE.  */
 779
 780 void
 781 dump_groups (FILE *file, struct ivopts_data *data)
 782 {
 783   unsigned i, j;
 784   struct iv_group *group;
 785
 786   for (i = 0; i < data->vgroups.length (); i++)
 787     {
 788       group = data->vgroups[i];
 789       fprintf (file, "Group %d:\n", group->id);
 790       if (group->type == USE_NONLINEAR_EXPR)
 791         fprintf (file, "  Type:\tGENERIC\n");
 792       else if (group->type == USE_REF_ADDRESS)
 793         fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
 794       else if (group->type == USE_PTR_ADDRESS)
 795         fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
 796       else
 797         {
 798           gcc_assert (group->type == USE_COMPARE);
 799           fprintf (file, "  Type:\tCOMPARE\n");
 800         }
 801       for (j = 0; j < group->vuses.length (); j++)
 802         dump_use (file, group->vuses[j]);
 803     }
 804 }
 805
 806 /* Dumps information about induction variable candidate CAND to FILE.  */
 807
 808 void
 809 dump_cand (FILE *file, struct iv_cand *cand)
 810 {
 811   struct iv *iv = cand->iv;
 812
 813   fprintf (file, "Candidate %d:\n", cand->id);
 814   if (cand->inv_vars)
 815     {
 816       fprintf (file, "  Depend on inv.vars: ");
 817       dump_bitmap (file, cand->inv_vars);
 818     }
 819   if (cand->inv_exprs)
 820     {
 821       fprintf (file, "  Depend on inv.exprs: ");
 822       dump_bitmap (file, cand->inv_exprs);
 823     }
 824
 825   if (cand->var_before)
 826     {
 827       fprintf (file, "  Var befor: ");
 828       print_generic_expr (file, cand->var_before, TDF_SLIM);
 829       fprintf (file, "\n");
 830     }
 831   if (cand->var_after)
 832     {
 833       fprintf (file, "  Var after: ");
 834       print_generic_expr (file, cand->var_after, TDF_SLIM);
 835       fprintf (file, "\n");
 836     }
 837
 838   switch (cand->pos)
 839     {
 840     case IP_NORMAL:
 841       fprintf (file, "  Incr POS: before exit test\n");
 842       break;
 843
 844     case IP_BEFORE_USE:
 845       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 846       break;
 847
 848     case IP_AFTER_USE:
 849       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 850       break;
 851
 852     case IP_END:
 853       fprintf (file, "  Incr POS: at end\n");
 854       break;
 855
 856     case IP_ORIGINAL:
 857       fprintf (file, "  Incr POS: orig biv\n");
 858       break;
 859     }
 860
 861   dump_iv (file, iv, false, 1);
 862 }
 863
 864 /* Returns the info for ssa version VER.  */
 865
 866 static inline struct version_info *
 867 ver_info (struct ivopts_data *data, unsigned ver)
 868 {
 869   return data->version_info + ver;
 870 }
 871
 872 /* Returns the info for ssa name NAME.  */
 873
 874 static inline struct version_info *
 875 name_info (struct ivopts_data *data, tree name)
 876 {
 877   return ver_info (data, SSA_NAME_VERSION (name));
 878 }
 879
 880 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 881    emitted in LOOP.  */
 882
 883 static bool
 884 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 885 {
 886   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 887
 888   gcc_assert (bb);
 889
 890   if (sbb == loop->latch)
 891     return true;
 892
 893   if (sbb != bb)
 894     return false;
 895
 896   return stmt == last_stmt (bb);
 897 }
 898
 899 /* Returns true if STMT if after the place where the original induction
 900    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 901    if the positions are identical.  */
 902
 903 static bool
 904 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 905 {
 906   basic_block cand_bb = gimple_bb (cand->incremented_at);
 907   basic_block stmt_bb = gimple_bb (stmt);
 908
 909   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 910     return false;
 911
 912   if (stmt_bb != cand_bb)
 913     return true;
 914
 915   if (true_if_equal
 916       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 917     return true;
 918   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 919 }
 920
 921 /* Returns true if STMT if after the place where the induction variable
 922    CAND is incremented in LOOP.  */
 923
 924 static bool
 925 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 926 {
 927   switch (cand->pos)
 928     {
 929     case IP_END:
 930       return false;
 931
 932     case IP_NORMAL:
 933       return stmt_after_ip_normal_pos (loop, stmt);
 934
 935     case IP_ORIGINAL:
 936     case IP_AFTER_USE:
 937       return stmt_after_inc_pos (cand, stmt, false);
 938
 939     case IP_BEFORE_USE:
 940       return stmt_after_inc_pos (cand, stmt, true);
 941
 942     default:
 943       gcc_unreachable ();
 944     }
 945 }
 946
 947 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 948
 949 static bool
 950 abnormal_ssa_name_p (tree exp)
 951 {
 952   if (!exp)
 953     return false;
 954
 955   if (TREE_CODE (exp) != SSA_NAME)
 956     return false;
 957
 958   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 959 }
 960
 961 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 962    abnormal phi node.  Callback for for_each_index.  */
 963
 964 static bool
 965 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 966                                   void *data ATTRIBUTE_UNUSED)
 967 {
 968   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 969     {
 970       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 971         return false;
 972       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 973         return false;
 974     }
 975
 976   return !abnormal_ssa_name_p (*index);
 977 }
 978
 979 /* Returns true if EXPR contains a ssa name that occurs in an
 980    abnormal phi node.  */
 981
 982 bool
 983 contains_abnormal_ssa_name_p (tree expr)
 984 {
 985   enum tree_code code;
 986   enum tree_code_class codeclass;
 987
 988   if (!expr)
 989     return false;
 990
 991   code = TREE_CODE (expr);
 992   codeclass = TREE_CODE_CLASS (code);
 993
 994   if (code == CALL_EXPR)
 995     {
 996       tree arg;
 997       call_expr_arg_iterator iter;
 998       FOR_EACH_CALL_EXPR_ARG (arg, iter, expr)
 999         if (contains_abnormal_ssa_name_p (arg))
1000           return true;
1001       return false;
1002     }
1003
1004   if (code == SSA_NAME)
1005     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
1006
1007   if (code == INTEGER_CST
1008       || is_gimple_min_invariant (expr))
1009     return false;
1010
1011   if (code == ADDR_EXPR)
1012     return !for_each_index (&TREE_OPERAND (expr, 0),
1013                             idx_contains_abnormal_ssa_name_p,
1014                             NULL);
1015
1016   if (code == COND_EXPR)
1017     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1018       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1019       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1020
1021   switch (codeclass)
1022     {
1023     case tcc_binary:
1024     case tcc_comparison:
1025       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1026         return true;
1027
1028       /* Fallthru.  */
1029     case tcc_unary:
1030       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1031         return true;
1032
1033       break;
1034
1035     default:
1036       gcc_unreachable ();
1037     }
1038
1039   return false;
1040 }
1041
1042 /*  Returns the structure describing number of iterations determined from
1043     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1044
1045 static struct tree_niter_desc *
1046 niter_for_exit (struct ivopts_data *data, edge exit)
1047 {
1048   struct tree_niter_desc *desc;
1049   tree_niter_desc **slot;
1050
1051   if (!data->niters)
1052     {
1053       data->niters = new hash_map<edge, tree_niter_desc *>;
1054       slot = NULL;
1055     }
1056   else
1057     slot = data->niters->get (exit);
1058
1059   if (!slot)
1060     {
1061       /* Try to determine number of iterations.  We cannot safely work with ssa
1062          names that appear in phi nodes on abnormal edges, so that we do not
1063          create overlapping life ranges for them (PR 27283).  */
1064       desc = XNEW (struct tree_niter_desc);
1065       if (!number_of_iterations_exit (data->current_loop,
1066                                       exit, desc, true)
1067           || contains_abnormal_ssa_name_p (desc->niter))
1068         {
1069           XDELETE (desc);
1070           desc = NULL;
1071         }
1072       data->niters->put (exit, desc);
1073     }
1074   else
1075     desc = *slot;
1076
1077   return desc;
1078 }
1079
1080 /* Returns the structure describing number of iterations determined from
1081    single dominating exit of DATA->current_loop, or NULL if something
1082    goes wrong.  */
1083
1084 static struct tree_niter_desc *
1085 niter_for_single_dom_exit (struct ivopts_data *data)
1086 {
1087   edge exit = single_dom_exit (data->current_loop);
1088
1089   if (!exit)
1090     return NULL;
1091
1092   return niter_for_exit (data, exit);
1093 }
1094
1095 /* Initializes data structures used by the iv optimization pass, stored
1096    in DATA.  */
1097
1098 static void
1099 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1100 {
1101   data->version_info_size = 2 * num_ssa_names;
1102   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1103   data->relevant = BITMAP_ALLOC (NULL);
1104   data->important_candidates = BITMAP_ALLOC (NULL);
1105   data->max_inv_var_id = 0;
1106   data->max_inv_expr_id = 0;
1107   data->niters = NULL;
1108   data->vgroups.create (20);
1109   data->vcands.create (20);
1110   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1111   data->name_expansion_cache = NULL;
1112   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1113   data->iv_common_cands.create (20);
1114   decl_rtl_to_reset.create (20);
1115   gcc_obstack_init (&data->iv_obstack);
1116 }
1117
1118 /* Returns a memory object to that EXPR points.  In case we are able to
1119    determine that it does not point to any such object, NULL is returned.  */
1120
1121 static tree
1122 determine_base_object (tree expr)
1123 {
1124   enum tree_code code = TREE_CODE (expr);
1125   tree base, obj;
1126
1127   /* If this is a pointer casted to any type, we need to determine
1128      the base object for the pointer; so handle conversions before
1129      throwing away non-pointer expressions.  */
1130   if (CONVERT_EXPR_P (expr))
1131     return determine_base_object (TREE_OPERAND (expr, 0));
1132
1133   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1134     return NULL_TREE;
1135
1136   switch (code)
1137     {
1138     case INTEGER_CST:
1139       return NULL_TREE;
1140
1141     case ADDR_EXPR:
1142       obj = TREE_OPERAND (expr, 0);
1143       base = get_base_address (obj);
1144
1145       if (!base)
1146         return expr;
1147
1148       if (TREE_CODE (base) == MEM_REF)
1149         return determine_base_object (TREE_OPERAND (base, 0));
1150
1151       return fold_convert (ptr_type_node,
1152                            build_fold_addr_expr (base));
1153
1154     case POINTER_PLUS_EXPR:
1155       return determine_base_object (TREE_OPERAND (expr, 0));
1156
1157     case PLUS_EXPR:
1158     case MINUS_EXPR:
1159       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1160       gcc_unreachable ();
1161
1162     default:
1163       if (POLY_INT_CST_P (expr))
1164         return NULL_TREE;
1165       return fold_convert (ptr_type_node, expr);
1166     }
1167 }
1168
1169 /* Return true if address expression with non-DECL_P operand appears
1170    in EXPR.  */
1171
1172 static bool
1173 contain_complex_addr_expr (tree expr)
1174 {
1175   bool res = false;
1176
1177   STRIP_NOPS (expr);
1178   switch (TREE_CODE (expr))
1179     {
1180     case POINTER_PLUS_EXPR:
1181     case PLUS_EXPR:
1182     case MINUS_EXPR:
1183       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1184       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1185       break;
1186
1187     case ADDR_EXPR:
1188       return (!DECL_P (TREE_OPERAND (expr, 0)));
1189
1190     default:
1191       return false;
1192     }
1193
1194   return res;
1195 }
1196
1197 /* Allocates an induction variable with given initial value BASE and step STEP
1198    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1199
1200 static struct iv *
1201 alloc_iv (struct ivopts_data *data, tree base, tree step,
1202           bool no_overflow = false)
1203 {
1204   tree expr = base;
1205   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1206                                               sizeof (struct iv));
1207   gcc_assert (step != NULL_TREE);
1208
1209   /* Lower address expression in base except ones with DECL_P as operand.
1210      By doing this:
1211        1) More accurate cost can be computed for address expressions;
1212        2) Duplicate candidates won't be created for bases in different
1213           forms, like &a[0] and &a.  */
1214   STRIP_NOPS (expr);
1215   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1216       || contain_complex_addr_expr (expr))
1217     {
1218       aff_tree comb;
1219       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1220       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1221     }
1222
1223   iv->base = base;
1224   iv->base_object = determine_base_object (base);
1225   iv->step = step;
1226   iv->biv_p = false;
1227   iv->nonlin_use = NULL;
1228   iv->ssa_name = NULL_TREE;
1229   if (!no_overflow
1230        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1231                               base, step))
1232     no_overflow = true;
1233   iv->no_overflow = no_overflow;
1234   iv->have_address_use = false;
1235
1236   return iv;
1237 }
1238
1239 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1240    doesn't overflow.  */
1241
1242 static void
1243 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1244         bool no_overflow)
1245 {
1246   struct version_info *info = name_info (data, iv);
1247
1248   gcc_assert (!info->iv);
1249
1250   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1251   info->iv = alloc_iv (data, base, step, no_overflow);
1252   info->iv->ssa_name = iv;
1253 }
1254
1255 /* Finds induction variable declaration for VAR.  */
1256
1257 static struct iv *
1258 get_iv (struct ivopts_data *data, tree var)
1259 {
1260   basic_block bb;
1261   tree type = TREE_TYPE (var);
1262
1263   if (!POINTER_TYPE_P (type)
1264       && !INTEGRAL_TYPE_P (type))
1265     return NULL;
1266
1267   if (!name_info (data, var)->iv)
1268     {
1269       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1270
1271       if (!bb
1272           || !flow_bb_inside_loop_p (data->current_loop, bb))
1273         set_iv (data, var, var, build_int_cst (type, 0), true);
1274     }
1275
1276   return name_info (data, var)->iv;
1277 }
1278
1279 /* Return the first non-invariant ssa var found in EXPR.  */
1280
1281 static tree
1282 extract_single_var_from_expr (tree expr)
1283 {
1284   int i, n;
1285   tree tmp;
1286   enum tree_code code;
1287
1288   if (!expr || is_gimple_min_invariant (expr))
1289     return NULL;
1290
1291   code = TREE_CODE (expr);
1292   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1293     {
1294       n = TREE_OPERAND_LENGTH (expr);
1295       for (i = 0; i < n; i++)
1296         {
1297           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1298
1299           if (tmp)
1300             return tmp;
1301         }
1302     }
1303   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1304 }
1305
1306 /* Finds basic ivs.  */
1307
1308 static bool
1309 find_bivs (struct ivopts_data *data)
1310 {
1311   gphi *phi;
1312   affine_iv iv;
1313   tree step, type, base, stop;
1314   bool found = false;
1315   struct loop *loop = data->current_loop;
1316   gphi_iterator psi;
1317
1318   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1319     {
1320       phi = psi.phi ();
1321
1322       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1323         continue;
1324
1325       if (virtual_operand_p (PHI_RESULT (phi)))
1326         continue;
1327
1328       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1329         continue;
1330
1331       if (integer_zerop (iv.step))
1332         continue;
1333
1334       step = iv.step;
1335       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1336       /* Stop expanding iv base at the first ssa var referred by iv step.
1337          Ideally we should stop at any ssa var, because that's expensive
1338          and unusual to happen, we just do it on the first one.
1339
1340          See PR64705 for the rationale.  */
1341       stop = extract_single_var_from_expr (step);
1342       base = expand_simple_operations (base, stop);
1343       if (contains_abnormal_ssa_name_p (base)
1344           || contains_abnormal_ssa_name_p (step))
1345         continue;
1346
1347       type = TREE_TYPE (PHI_RESULT (phi));
1348       base = fold_convert (type, base);
1349       if (step)
1350         {
1351           if (POINTER_TYPE_P (type))
1352             step = convert_to_ptrofftype (step);
1353           else
1354             step = fold_convert (type, step);
1355         }
1356
1357       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1358       found = true;
1359     }
1360
1361   return found;
1362 }
1363
1364 /* Marks basic ivs.  */
1365
1366 static void
1367 mark_bivs (struct ivopts_data *data)
1368 {
1369   gphi *phi;
1370   gimple *def;
1371   tree var;
1372   struct iv *iv, *incr_iv;
1373   struct loop *loop = data->current_loop;
1374   basic_block incr_bb;
1375   gphi_iterator psi;
1376
1377   data->bivs_not_used_in_addr = 0;
1378   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1379     {
1380       phi = psi.phi ();
1381
1382       iv = get_iv (data, PHI_RESULT (phi));
1383       if (!iv)
1384         continue;
1385
1386       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1387       def = SSA_NAME_DEF_STMT (var);
1388       /* Don't mark iv peeled from other one as biv.  */
1389       if (def
1390           && gimple_code (def) == GIMPLE_PHI
1391           && gimple_bb (def) == loop->header)
1392         continue;
1393
1394       incr_iv = get_iv (data, var);
1395       if (!incr_iv)
1396         continue;
1397
1398       /* If the increment is in the subloop, ignore it.  */
1399       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1400       if (incr_bb->loop_father != data->current_loop
1401           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1402         continue;
1403
1404       iv->biv_p = true;
1405       incr_iv->biv_p = true;
1406       if (iv->no_overflow)
1407         data->bivs_not_used_in_addr++;
1408       if (incr_iv->no_overflow)
1409         data->bivs_not_used_in_addr++;
1410     }
1411 }
1412
1413 /* Checks whether STMT defines a linear induction variable and stores its
1414    parameters to IV.  */
1415
1416 static bool
1417 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1418 {
1419   tree lhs, stop;
1420   struct loop *loop = data->current_loop;
1421
1422   iv->base = NULL_TREE;
1423   iv->step = NULL_TREE;
1424
1425   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1426     return false;
1427
1428   lhs = gimple_assign_lhs (stmt);
1429   if (TREE_CODE (lhs) != SSA_NAME)
1430     return false;
1431
1432   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1433     return false;
1434
1435   /* Stop expanding iv base at the first ssa var referred by iv step.
1436      Ideally we should stop at any ssa var, because that's expensive
1437      and unusual to happen, we just do it on the first one.
1438
1439      See PR64705 for the rationale.  */
1440   stop = extract_single_var_from_expr (iv->step);
1441   iv->base = expand_simple_operations (iv->base, stop);
1442   if (contains_abnormal_ssa_name_p (iv->base)
1443       || contains_abnormal_ssa_name_p (iv->step))
1444     return false;
1445
1446   /* If STMT could throw, then do not consider STMT as defining a GIV.
1447      While this will suppress optimizations, we cannot safely delete this
1448      GIV and associated statements, even if it appears it is not used.  */
1449   if (stmt_could_throw_p (cfun, stmt))
1450     return false;
1451
1452   return true;
1453 }
1454
1455 /* Finds general ivs in statement STMT.  */
1456
1457 static void
1458 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1459 {
1460   affine_iv iv;
1461
1462   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1463     return;
1464
1465   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1466 }
1467
1468 /* Finds general ivs in basic block BB.  */
1469
1470 static void
1471 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1472 {
1473   gimple_stmt_iterator bsi;
1474
1475   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1476     find_givs_in_stmt (data, gsi_stmt (bsi));
1477 }
1478
1479 /* Finds general ivs.  */
1480
1481 static void
1482 find_givs (struct ivopts_data *data)
1483 {
1484   struct loop *loop = data->current_loop;
1485   basic_block *body = get_loop_body_in_dom_order (loop);
1486   unsigned i;
1487
1488   for (i = 0; i < loop->num_nodes; i++)
1489     find_givs_in_bb (data, body[i]);
1490   free (body);
1491 }
1492
1493 /* For each ssa name defined in LOOP determines whether it is an induction
1494    variable and if so, its initial value and step.  */
1495
1496 static bool
1497 find_induction_variables (struct ivopts_data *data)
1498 {
1499   unsigned i;
1500   bitmap_iterator bi;
1501
1502   if (!find_bivs (data))
1503     return false;
1504
1505   find_givs (data);
1506   mark_bivs (data);
1507
1508   if (dump_file && (dump_flags & TDF_DETAILS))
1509     {
1510       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1511
1512       if (niter)
1513         {
1514           fprintf (dump_file, "  number of iterations ");
1515           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1516           if (!integer_zerop (niter->may_be_zero))
1517             {
1518               fprintf (dump_file, "; zero if ");
1519               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1520             }
1521           fprintf (dump_file, "\n");
1522         };
1523
1524       fprintf (dump_file, "\n<Induction Vars>:\n");
1525       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1526         {
1527           struct version_info *info = ver_info (data, i);
1528           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1529             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1530         }
1531     }
1532
1533   return true;
1534 }
1535
1536 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1537    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1538    is the const offset stripped from IV base and MEM_TYPE is the type
1539    of the memory being addressed.  For uses of other types, ADDR_BASE
1540    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1541
1542 static struct iv_use *
1543 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1544             gimple *stmt, enum use_type type, tree mem_type,
1545             tree addr_base, poly_uint64 addr_offset)
1546 {
1547   struct iv_use *use = XCNEW (struct iv_use);
1548
1549   use->id = group->vuses.length ();
1550   use->group_id = group->id;
1551   use->type = type;
1552   use->mem_type = mem_type;
1553   use->iv = iv;
1554   use->stmt = stmt;
1555   use->op_p = use_p;
1556   use->addr_base = addr_base;
1557   use->addr_offset = addr_offset;
1558
1559   group->vuses.safe_push (use);
1560   return use;
1561 }
1562
1563 /* Checks whether OP is a loop-level invariant and if so, records it.
1564    NONLINEAR_USE is true if the invariant is used in a way we do not
1565    handle specially.  */
1566
1567 static void
1568 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1569 {
1570   basic_block bb;
1571   struct version_info *info;
1572
1573   if (TREE_CODE (op) != SSA_NAME
1574       || virtual_operand_p (op))
1575     return;
1576
1577   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1578   if (bb
1579       && flow_bb_inside_loop_p (data->current_loop, bb))
1580     return;
1581
1582   info = name_info (data, op);
1583   info->name = op;
1584   info->has_nonlin_use |= nonlinear_use;
1585   if (!info->inv_id)
1586     info->inv_id = ++data->max_inv_var_id;
1587   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1588 }
1589
1590 /* Record a group of TYPE.  */
1591
1592 static struct iv_group *
1593 record_group (struct ivopts_data *data, enum use_type type)
1594 {
1595   struct iv_group *group = XCNEW (struct iv_group);
1596
1597   group->id = data->vgroups.length ();
1598   group->type = type;
1599   group->related_cands = BITMAP_ALLOC (NULL);
1600   group->vuses.create (1);
1601
1602   data->vgroups.safe_push (group);
1603   return group;
1604 }
1605
1606 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1607    New group will be created if there is no existing group for the use.
1608    MEM_TYPE is the type of memory being addressed, or NULL if this
1609    isn't an address reference.  */
1610
1611 static struct iv_use *
1612 record_group_use (struct ivopts_data *data, tree *use_p,
1613                   struct iv *iv, gimple *stmt, enum use_type type,
1614                   tree mem_type)
1615 {
1616   tree addr_base = NULL;
1617   struct iv_group *group = NULL;
1618   poly_uint64 addr_offset = 0;
1619
1620   /* Record non address type use in a new group.  */
1621   if (address_p (type))
1622     {
1623       unsigned int i;
1624
1625       addr_base = strip_offset (iv->base, &addr_offset);
1626       for (i = 0; i < data->vgroups.length (); i++)
1627         {
1628           struct iv_use *use;
1629
1630           group = data->vgroups[i];
1631           use = group->vuses[0];
1632           if (!address_p (use->type))
1633             continue;
1634
1635           /* Check if it has the same stripped base and step.  */
1636           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1637               && operand_equal_p (iv->step, use->iv->step, 0)
1638               && operand_equal_p (addr_base, use->addr_base, 0))
1639             break;
1640         }
1641       if (i == data->vgroups.length ())
1642         group = NULL;
1643     }
1644
1645   if (!group)
1646     group = record_group (data, type);
1647
1648   return record_use (group, use_p, iv, stmt, type, mem_type,
1649                      addr_base, addr_offset);
1650 }
1651
1652 /* Checks whether the use OP is interesting and if so, records it.  */
1653
1654 static struct iv_use *
1655 find_interesting_uses_op (struct ivopts_data *data, tree op)
1656 {
1657   struct iv *iv;
1658   gimple *stmt;
1659   struct iv_use *use;
1660
1661   if (TREE_CODE (op) != SSA_NAME)
1662     return NULL;
1663
1664   iv = get_iv (data, op);
1665   if (!iv)
1666     return NULL;
1667
1668   if (iv->nonlin_use)
1669     {
1670       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1671       return iv->nonlin_use;
1672     }
1673
1674   if (integer_zerop (iv->step))
1675     {
1676       record_invariant (data, op, true);
1677       return NULL;
1678     }
1679
1680   stmt = SSA_NAME_DEF_STMT (op);
1681   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1682
1683   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1684   iv->nonlin_use = use;
1685   return use;
1686 }
1687
1688 /* Indicate how compare type iv_use can be handled.  */
1689 enum comp_iv_rewrite
1690 {
1691   COMP_IV_NA,
1692   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1693   COMP_IV_EXPR,
1694   /* We may rewrite compare type iv_uses on both sides of comparison by
1695      expressing value of each iv_use.  */
1696   COMP_IV_EXPR_2,
1697   /* We may rewrite compare type iv_use by expressing value of the iv_use
1698      or by eliminating it with other iv_cand.  */
1699   COMP_IV_ELIM
1700 };
1701
1702 /* Given a condition in statement STMT, checks whether it is a compare
1703    of an induction variable and an invariant.  If this is the case,
1704    CONTROL_VAR is set to location of the iv, BOUND to the location of
1705    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1706    induction variable descriptions, and true is returned.  If this is not
1707    the case, CONTROL_VAR and BOUND are set to the arguments of the
1708    condition and false is returned.  */
1709
1710 static enum comp_iv_rewrite
1711 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1712                        tree **control_var, tree **bound,
1713                        struct iv **iv_var, struct iv **iv_bound)
1714 {
1715   /* The objects returned when COND has constant operands.  */
1716   static struct iv const_iv;
1717   static tree zero;
1718   tree *op0 = &zero, *op1 = &zero;
1719   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1720   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1721
1722   if (gimple_code (stmt) == GIMPLE_COND)
1723     {
1724       gcond *cond_stmt = as_a <gcond *> (stmt);
1725       op0 = gimple_cond_lhs_ptr (cond_stmt);
1726       op1 = gimple_cond_rhs_ptr (cond_stmt);
1727     }
1728   else
1729     {
1730       op0 = gimple_assign_rhs1_ptr (stmt);
1731       op1 = gimple_assign_rhs2_ptr (stmt);
1732     }
1733
1734   zero = integer_zero_node;
1735   const_iv.step = integer_zero_node;
1736
1737   if (TREE_CODE (*op0) == SSA_NAME)
1738     iv0 = get_iv (data, *op0);
1739   if (TREE_CODE (*op1) == SSA_NAME)
1740     iv1 = get_iv (data, *op1);
1741
1742   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1743   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1744     {
1745       rewrite_type = COMP_IV_EXPR_2;
1746       goto end;
1747     }
1748
1749   /* If none side of comparison is IV.  */
1750   if ((!iv0 || integer_zerop (iv0->step))
1751       && (!iv1 || integer_zerop (iv1->step)))
1752     goto end;
1753
1754   /* Control variable may be on the other side.  */
1755   if (!iv0 || integer_zerop (iv0->step))
1756     {
1757       std::swap (op0, op1);
1758       std::swap (iv0, iv1);
1759     }
1760   /* If one side is IV and the other side isn't loop invariant.  */
1761   if (!iv1)
1762     rewrite_type = COMP_IV_EXPR;
1763   /* If one side is IV and the other side is loop invariant.  */
1764   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1765     rewrite_type = COMP_IV_ELIM;
1766
1767 end:
1768   if (control_var)
1769     *control_var = op0;
1770   if (iv_var)
1771     *iv_var = iv0;
1772   if (bound)
1773     *bound = op1;
1774   if (iv_bound)
1775     *iv_bound = iv1;
1776
1777   return rewrite_type;
1778 }
1779
1780 /* Checks whether the condition in STMT is interesting and if so,
1781    records it.  */
1782
1783 static void
1784 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1785 {
1786   tree *var_p, *bound_p;
1787   struct iv *var_iv, *bound_iv;
1788   enum comp_iv_rewrite ret;
1789
1790   ret = extract_cond_operands (data, stmt,
1791                                &var_p, &bound_p, &var_iv, &bound_iv);
1792   if (ret == COMP_IV_NA)
1793     {
1794       find_interesting_uses_op (data, *var_p);
1795       find_interesting_uses_op (data, *bound_p);
1796       return;
1797     }
1798
1799   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1800   /* Record compare type iv_use for iv on the other side of comparison.  */
1801   if (ret == COMP_IV_EXPR_2)
1802     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1803 }
1804
1805 /* Returns the outermost loop EXPR is obviously invariant in
1806    relative to the loop LOOP, i.e. if all its operands are defined
1807    outside of the returned loop.  Returns NULL if EXPR is not
1808    even obviously invariant in LOOP.  */
1809
1810 struct loop *
1811 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1812 {
1813   basic_block def_bb;
1814   unsigned i, len;
1815
1816   if (is_gimple_min_invariant (expr))
1817     return current_loops->tree_root;
1818
1819   if (TREE_CODE (expr) == SSA_NAME)
1820     {
1821       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1822       if (def_bb)
1823         {
1824           if (flow_bb_inside_loop_p (loop, def_bb))
1825             return NULL;
1826           return superloop_at_depth (loop,
1827                                      loop_depth (def_bb->loop_father) + 1);
1828         }
1829
1830       return current_loops->tree_root;
1831     }
1832
1833   if (!EXPR_P (expr))
1834     return NULL;
1835
1836   unsigned maxdepth = 0;
1837   len = TREE_OPERAND_LENGTH (expr);
1838   for (i = 0; i < len; i++)
1839     {
1840       struct loop *ivloop;
1841       if (!TREE_OPERAND (expr, i))
1842         continue;
1843
1844       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1845       if (!ivloop)
1846         return NULL;
1847       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1848     }
1849
1850   return superloop_at_depth (loop, maxdepth);
1851 }
1852
1853 /* Returns true if expression EXPR is obviously invariant in LOOP,
1854    i.e. if all its operands are defined outside of the LOOP.  LOOP
1855    should not be the function body.  */
1856
1857 bool
1858 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1859 {
1860   basic_block def_bb;
1861   unsigned i, len;
1862
1863   gcc_assert (loop_depth (loop) > 0);
1864
1865   if (is_gimple_min_invariant (expr))
1866     return true;
1867
1868   if (TREE_CODE (expr) == SSA_NAME)
1869     {
1870       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1871       if (def_bb
1872           && flow_bb_inside_loop_p (loop, def_bb))
1873         return false;
1874
1875       return true;
1876     }
1877
1878   if (!EXPR_P (expr))
1879     return false;
1880
1881   len = TREE_OPERAND_LENGTH (expr);
1882   for (i = 0; i < len; i++)
1883     if (TREE_OPERAND (expr, i)
1884         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1885       return false;
1886
1887   return true;
1888 }
1889
1890 /* Given expression EXPR which computes inductive values with respect
1891    to loop recorded in DATA, this function returns biv from which EXPR
1892    is derived by tracing definition chains of ssa variables in EXPR.  */
1893
1894 static struct iv*
1895 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1896 {
1897   struct iv *iv;
1898   unsigned i, n;
1899   tree e2, e1;
1900   enum tree_code code;
1901   gimple *stmt;
1902
1903   if (expr == NULL_TREE)
1904     return NULL;
1905
1906   if (is_gimple_min_invariant (expr))
1907     return NULL;
1908
1909   code = TREE_CODE (expr);
1910   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1911     {
1912       n = TREE_OPERAND_LENGTH (expr);
1913       for (i = 0; i < n; i++)
1914         {
1915           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1916           if (iv)
1917             return iv;
1918         }
1919     }
1920
1921   /* Stop if it's not ssa name.  */
1922   if (code != SSA_NAME)
1923     return NULL;
1924
1925   iv = get_iv (data, expr);
1926   if (!iv || integer_zerop (iv->step))
1927     return NULL;
1928   else if (iv->biv_p)
1929     return iv;
1930
1931   stmt = SSA_NAME_DEF_STMT (expr);
1932   if (gphi *phi = dyn_cast <gphi *> (stmt))
1933     {
1934       ssa_op_iter iter;
1935       use_operand_p use_p;
1936       basic_block phi_bb = gimple_bb (phi);
1937
1938       /* Skip loop header PHI that doesn't define biv.  */
1939       if (phi_bb->loop_father == data->current_loop)
1940         return NULL;
1941
1942       if (virtual_operand_p (gimple_phi_result (phi)))
1943         return NULL;
1944
1945       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1946         {
1947           tree use = USE_FROM_PTR (use_p);
1948           iv = find_deriving_biv_for_expr (data, use);
1949           if (iv)
1950             return iv;
1951         }
1952       return NULL;
1953     }
1954   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1955     return NULL;
1956
1957   e1 = gimple_assign_rhs1 (stmt);
1958   code = gimple_assign_rhs_code (stmt);
1959   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1960     return find_deriving_biv_for_expr (data, e1);
1961
1962   switch (code)
1963     {
1964     case MULT_EXPR:
1965     case PLUS_EXPR:
1966     case MINUS_EXPR:
1967     case POINTER_PLUS_EXPR:
1968       /* Increments, decrements and multiplications by a constant
1969          are simple.  */
1970       e2 = gimple_assign_rhs2 (stmt);
1971       iv = find_deriving_biv_for_expr (data, e2);
1972       if (iv)
1973         return iv;
1974       gcc_fallthrough ();
1975
1976     CASE_CONVERT:
1977       /* Casts are simple.  */
1978       return find_deriving_biv_for_expr (data, e1);
1979
1980     default:
1981       break;
1982     }
1983
1984   return NULL;
1985 }
1986
1987 /* Record BIV, its predecessor and successor that they are used in
1988    address type uses.  */
1989
1990 static void
1991 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1992 {
1993   unsigned i;
1994   tree type, base_1, base_2;
1995   bitmap_iterator bi;
1996
1997   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1998       || biv->have_address_use || !biv->no_overflow)
1999     return;
2000
2001   type = TREE_TYPE (biv->base);
2002   if (!INTEGRAL_TYPE_P (type))
2003     return;
2004
2005   biv->have_address_use = true;
2006   data->bivs_not_used_in_addr--;
2007   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
2008   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2009     {
2010       struct iv *iv = ver_info (data, i)->iv;
2011
2012       if (!iv || !iv->biv_p || integer_zerop (iv->step)
2013           || iv->have_address_use || !iv->no_overflow)
2014         continue;
2015
2016       if (type != TREE_TYPE (iv->base)
2017           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2018         continue;
2019
2020       if (!operand_equal_p (biv->step, iv->step, 0))
2021         continue;
2022
2023       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2024       if (operand_equal_p (base_1, iv->base, 0)
2025           || operand_equal_p (base_2, biv->base, 0))
2026         {
2027           iv->have_address_use = true;
2028           data->bivs_not_used_in_addr--;
2029         }
2030     }
2031 }
2032
2033 /* Cumulates the steps of indices into DATA and replaces their values with the
2034    initial ones.  Returns false when the value of the index cannot be determined.
2035    Callback for for_each_index.  */
2036
2037 struct ifs_ivopts_data
2038 {
2039   struct ivopts_data *ivopts_data;
2040   gimple *stmt;
2041   tree step;
2042 };
2043
2044 static bool
2045 idx_find_step (tree base, tree *idx, void *data)
2046 {
2047   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2048   struct iv *iv;
2049   bool use_overflow_semantics = false;
2050   tree step, iv_base, iv_step, lbound, off;
2051   struct loop *loop = dta->ivopts_data->current_loop;
2052
2053   /* If base is a component ref, require that the offset of the reference
2054      be invariant.  */
2055   if (TREE_CODE (base) == COMPONENT_REF)
2056     {
2057       off = component_ref_field_offset (base);
2058       return expr_invariant_in_loop_p (loop, off);
2059     }
2060
2061   /* If base is array, first check whether we will be able to move the
2062      reference out of the loop (in order to take its address in strength
2063      reduction).  In order for this to work we need both lower bound
2064      and step to be loop invariants.  */
2065   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2066     {
2067       /* Moreover, for a range, the size needs to be invariant as well.  */
2068       if (TREE_CODE (base) == ARRAY_RANGE_REF
2069           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2070         return false;
2071
2072       step = array_ref_element_size (base);
2073       lbound = array_ref_low_bound (base);
2074
2075       if (!expr_invariant_in_loop_p (loop, step)
2076           || !expr_invariant_in_loop_p (loop, lbound))
2077         return false;
2078     }
2079
2080   if (TREE_CODE (*idx) != SSA_NAME)
2081     return true;
2082
2083   iv = get_iv (dta->ivopts_data, *idx);
2084   if (!iv)
2085     return false;
2086
2087   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2088           *&x[0], which is not folded and does not trigger the
2089           ARRAY_REF path below.  */
2090   *idx = iv->base;
2091
2092   if (integer_zerop (iv->step))
2093     return true;
2094
2095   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2096     {
2097       step = array_ref_element_size (base);
2098
2099       /* We only handle addresses whose step is an integer constant.  */
2100       if (TREE_CODE (step) != INTEGER_CST)
2101         return false;
2102     }
2103   else
2104     /* The step for pointer arithmetics already is 1 byte.  */
2105     step = size_one_node;
2106
2107   iv_base = iv->base;
2108   iv_step = iv->step;
2109   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2110     use_overflow_semantics = true;
2111
2112   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2113                             sizetype, &iv_base, &iv_step, dta->stmt,
2114                             use_overflow_semantics))
2115     {
2116       /* The index might wrap.  */
2117       return false;
2118     }
2119
2120   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2121   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2122
2123   if (dta->ivopts_data->bivs_not_used_in_addr)
2124     {
2125       if (!iv->biv_p)
2126         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2127
2128       record_biv_for_address_use (dta->ivopts_data, iv);
2129     }
2130   return true;
2131 }
2132
2133 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2134    object is passed to it in DATA.  */
2135
2136 static bool
2137 idx_record_use (tree base, tree *idx,
2138                 void *vdata)
2139 {
2140   struct ivopts_data *data = (struct ivopts_data *) vdata;
2141   find_interesting_uses_op (data, *idx);
2142   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2143     {
2144       find_interesting_uses_op (data, array_ref_element_size (base));
2145       find_interesting_uses_op (data, array_ref_low_bound (base));
2146     }
2147   return true;
2148 }
2149
2150 /* If we can prove that TOP = cst * BOT for some constant cst,
2151    store cst to MUL and return true.  Otherwise return false.
2152    The returned value is always sign-extended, regardless of the
2153    signedness of TOP and BOT.  */
2154
2155 static bool
2156 constant_multiple_of (tree top, tree bot, widest_int *mul)
2157 {
2158   tree mby;
2159   enum tree_code code;
2160   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2161   widest_int res, p0, p1;
2162
2163   STRIP_NOPS (top);
2164   STRIP_NOPS (bot);
2165
2166   if (operand_equal_p (top, bot, 0))
2167     {
2168       *mul = 1;
2169       return true;
2170     }
2171
2172   code = TREE_CODE (top);
2173   switch (code)
2174     {
2175     case MULT_EXPR:
2176       mby = TREE_OPERAND (top, 1);
2177       if (TREE_CODE (mby) != INTEGER_CST)
2178         return false;
2179
2180       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2181         return false;
2182
2183       *mul = wi::sext (res * wi::to_widest (mby), precision);
2184       return true;
2185
2186     case PLUS_EXPR:
2187     case MINUS_EXPR:
2188       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2189           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2190         return false;
2191
2192       if (code == MINUS_EXPR)
2193         p1 = -p1;
2194       *mul = wi::sext (p0 + p1, precision);
2195       return true;
2196
2197     case INTEGER_CST:
2198       if (TREE_CODE (bot) != INTEGER_CST)
2199         return false;
2200
2201       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2202       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2203       if (p1 == 0)
2204         return false;
2205       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2206       return res == 0;
2207
2208     default:
2209       if (POLY_INT_CST_P (top)
2210           && POLY_INT_CST_P (bot)
2211           && constant_multiple_p (wi::to_poly_widest (top),
2212                                   wi::to_poly_widest (bot), mul))
2213         return true;
2214
2215       return false;
2216     }
2217 }
2218
2219 /* Return true if memory reference REF with step STEP may be unaligned.  */
2220
2221 static bool
2222 may_be_unaligned_p (tree ref, tree step)
2223 {
2224   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2225      thus they are not misaligned.  */
2226   if (TREE_CODE (ref) == TARGET_MEM_REF)
2227     return false;
2228
2229   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2230   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2231     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2232
2233   unsigned HOST_WIDE_INT bitpos;
2234   unsigned int ref_align;
2235   get_object_alignment_1 (ref, &ref_align, &bitpos);
2236   if (ref_align < align
2237       || (bitpos % align) != 0
2238       || (bitpos % BITS_PER_UNIT) != 0)
2239     return true;
2240
2241   unsigned int trailing_zeros = tree_ctz (step);
2242   if (trailing_zeros < HOST_BITS_PER_INT
2243       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2244     return true;
2245
2246   return false;
2247 }
2248
2249 /* Return true if EXPR may be non-addressable.   */
2250
2251 bool
2252 may_be_nonaddressable_p (tree expr)
2253 {
2254   switch (TREE_CODE (expr))
2255     {
2256     case VAR_DECL:
2257       /* Check if it's a register variable.  */
2258       return DECL_HARD_REGISTER (expr);
2259
2260     case TARGET_MEM_REF:
2261       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2262          target, thus they are always addressable.  */
2263       return false;
2264
2265     case MEM_REF:
2266       /* Likewise for MEM_REFs, modulo the storage order.  */
2267       return REF_REVERSE_STORAGE_ORDER (expr);
2268
2269     case BIT_FIELD_REF:
2270       if (REF_REVERSE_STORAGE_ORDER (expr))
2271         return true;
2272       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2273
2274     case COMPONENT_REF:
2275       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2276         return true;
2277       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2278              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2279
2280     case ARRAY_REF:
2281     case ARRAY_RANGE_REF:
2282       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2283         return true;
2284       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2285
2286     case VIEW_CONVERT_EXPR:
2287       /* This kind of view-conversions may wrap non-addressable objects
2288          and make them look addressable.  After some processing the
2289          non-addressability may be uncovered again, causing ADDR_EXPRs
2290          of inappropriate objects to be built.  */
2291       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2292           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2293         return true;
2294       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2295
2296     CASE_CONVERT:
2297       return true;
2298
2299     default:
2300       break;
2301     }
2302
2303   return false;
2304 }
2305
2306 /* Finds addresses in *OP_P inside STMT.  */
2307
2308 static void
2309 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2310                                tree *op_p)
2311 {
2312   tree base = *op_p, step = size_zero_node;
2313   struct iv *civ;
2314   struct ifs_ivopts_data ifs_ivopts_data;
2315
2316   /* Do not play with volatile memory references.  A bit too conservative,
2317      perhaps, but safe.  */
2318   if (gimple_has_volatile_ops (stmt))
2319     goto fail;
2320
2321   /* Ignore bitfields for now.  Not really something terribly complicated
2322      to handle.  TODO.  */
2323   if (TREE_CODE (base) == BIT_FIELD_REF)
2324     goto fail;
2325
2326   base = unshare_expr (base);
2327
2328   if (TREE_CODE (base) == TARGET_MEM_REF)
2329     {
2330       tree type = build_pointer_type (TREE_TYPE (base));
2331       tree astep;
2332
2333       if (TMR_BASE (base)
2334           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2335         {
2336           civ = get_iv (data, TMR_BASE (base));
2337           if (!civ)
2338             goto fail;
2339
2340           TMR_BASE (base) = civ->base;
2341           step = civ->step;
2342         }
2343       if (TMR_INDEX2 (base)
2344           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2345         {
2346           civ = get_iv (data, TMR_INDEX2 (base));
2347           if (!civ)
2348             goto fail;
2349
2350           TMR_INDEX2 (base) = civ->base;
2351           step = civ->step;
2352         }
2353       if (TMR_INDEX (base)
2354           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2355         {
2356           civ = get_iv (data, TMR_INDEX (base));
2357           if (!civ)
2358             goto fail;
2359
2360           TMR_INDEX (base) = civ->base;
2361           astep = civ->step;
2362
2363           if (astep)
2364             {
2365               if (TMR_STEP (base))
2366                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2367
2368               step = fold_build2 (PLUS_EXPR, type, step, astep);
2369             }
2370         }
2371
2372       if (integer_zerop (step))
2373         goto fail;
2374       base = tree_mem_ref_addr (type, base);
2375     }
2376   else
2377     {
2378       ifs_ivopts_data.ivopts_data = data;
2379       ifs_ivopts_data.stmt = stmt;
2380       ifs_ivopts_data.step = size_zero_node;
2381       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2382           || integer_zerop (ifs_ivopts_data.step))
2383         goto fail;
2384       step = ifs_ivopts_data.step;
2385
2386       /* Check that the base expression is addressable.  This needs
2387          to be done after substituting bases of IVs into it.  */
2388       if (may_be_nonaddressable_p (base))
2389         goto fail;
2390
2391       /* Moreover, on strict alignment platforms, check that it is
2392          sufficiently aligned.  */
2393       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2394         goto fail;
2395
2396       base = build_fold_addr_expr (base);
2397
2398       /* Substituting bases of IVs into the base expression might
2399          have caused folding opportunities.  */
2400       if (TREE_CODE (base) == ADDR_EXPR)
2401         {
2402           tree *ref = &TREE_OPERAND (base, 0);
2403           while (handled_component_p (*ref))
2404             ref = &TREE_OPERAND (*ref, 0);
2405           if (TREE_CODE (*ref) == MEM_REF)
2406             {
2407               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2408                                       TREE_OPERAND (*ref, 0),
2409                                       TREE_OPERAND (*ref, 1));
2410               if (tem)
2411                 *ref = tem;
2412             }
2413         }
2414     }
2415
2416   civ = alloc_iv (data, base, step);
2417   /* Fail if base object of this memory reference is unknown.  */
2418   if (civ->base_object == NULL_TREE)
2419     goto fail;
2420
2421   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2422   return;
2423
2424 fail:
2425   for_each_index (op_p, idx_record_use, data);
2426 }
2427
2428 /* Finds and records invariants used in STMT.  */
2429
2430 static void
2431 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2432 {
2433   ssa_op_iter iter;
2434   use_operand_p use_p;
2435   tree op;
2436
2437   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2438     {
2439       op = USE_FROM_PTR (use_p);
2440       record_invariant (data, op, false);
2441     }
2442 }
2443
2444 /* CALL calls an internal function.  If operand *OP_P will become an
2445    address when the call is expanded, return the type of the memory
2446    being addressed, otherwise return null.  */
2447
2448 static tree
2449 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2450 {
2451   switch (gimple_call_internal_fn (call))
2452     {
2453     case IFN_MASK_LOAD:
2454       if (op_p == gimple_call_arg_ptr (call, 0))
2455         return TREE_TYPE (gimple_call_lhs (call));
2456       return NULL_TREE;
2457
2458     case IFN_MASK_STORE:
2459       if (op_p == gimple_call_arg_ptr (call, 0))
2460         return TREE_TYPE (gimple_call_arg (call, 3));
2461       return NULL_TREE;
2462
2463     default:
2464       return NULL_TREE;
2465     }
2466 }
2467
2468 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2469    Return true if the operand will become an address when STMT
2470    is expanded and record the associated address use if so.  */
2471
2472 static bool
2473 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2474                        struct iv *iv)
2475 {
2476   /* Fail if base object of this memory reference is unknown.  */
2477   if (iv->base_object == NULL_TREE)
2478     return false;
2479
2480   tree mem_type = NULL_TREE;
2481   if (gcall *call = dyn_cast <gcall *> (stmt))
2482     if (gimple_call_internal_p (call))
2483       mem_type = get_mem_type_for_internal_fn (call, op_p);
2484   if (mem_type)
2485     {
2486       iv = alloc_iv (data, iv->base, iv->step);
2487       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2488       return true;
2489     }
2490   return false;
2491 }
2492
2493 /* Finds interesting uses of induction variables in the statement STMT.  */
2494
2495 static void
2496 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2497 {
2498   struct iv *iv;
2499   tree op, *lhs, *rhs;
2500   ssa_op_iter iter;
2501   use_operand_p use_p;
2502   enum tree_code code;
2503
2504   find_invariants_stmt (data, stmt);
2505
2506   if (gimple_code (stmt) == GIMPLE_COND)
2507     {
2508       find_interesting_uses_cond (data, stmt);
2509       return;
2510     }
2511
2512   if (is_gimple_assign (stmt))
2513     {
2514       lhs = gimple_assign_lhs_ptr (stmt);
2515       rhs = gimple_assign_rhs1_ptr (stmt);
2516
2517       if (TREE_CODE (*lhs) == SSA_NAME)
2518         {
2519           /* If the statement defines an induction variable, the uses are not
2520              interesting by themselves.  */
2521
2522           iv = get_iv (data, *lhs);
2523
2524           if (iv && !integer_zerop (iv->step))
2525             return;
2526         }
2527
2528       code = gimple_assign_rhs_code (stmt);
2529       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2530           && (REFERENCE_CLASS_P (*rhs)
2531               || is_gimple_val (*rhs)))
2532         {
2533           if (REFERENCE_CLASS_P (*rhs))
2534             find_interesting_uses_address (data, stmt, rhs);
2535           else
2536             find_interesting_uses_op (data, *rhs);
2537
2538           if (REFERENCE_CLASS_P (*lhs))
2539             find_interesting_uses_address (data, stmt, lhs);
2540           return;
2541         }
2542       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2543         {
2544           find_interesting_uses_cond (data, stmt);
2545           return;
2546         }
2547
2548       /* TODO -- we should also handle address uses of type
2549
2550          memory = call (whatever);
2551
2552          and
2553
2554          call (memory).  */
2555     }
2556
2557   if (gimple_code (stmt) == GIMPLE_PHI
2558       && gimple_bb (stmt) == data->current_loop->header)
2559     {
2560       iv = get_iv (data, PHI_RESULT (stmt));
2561
2562       if (iv && !integer_zerop (iv->step))
2563         return;
2564     }
2565
2566   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2567     {
2568       op = USE_FROM_PTR (use_p);
2569
2570       if (TREE_CODE (op) != SSA_NAME)
2571         continue;
2572
2573       iv = get_iv (data, op);
2574       if (!iv)
2575         continue;
2576
2577       if (!find_address_like_use (data, stmt, use_p->use, iv))
2578         find_interesting_uses_op (data, op);
2579     }
2580 }
2581
2582 /* Finds interesting uses of induction variables outside of loops
2583    on loop exit edge EXIT.  */
2584
2585 static void
2586 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2587 {
2588   gphi *phi;
2589   gphi_iterator psi;
2590   tree def;
2591
2592   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2593     {
2594       phi = psi.phi ();
2595       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2596       if (!virtual_operand_p (def))
2597         find_interesting_uses_op (data, def);
2598     }
2599 }
2600
2601 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2602    mode for memory reference represented by USE.  */
2603
2604 static GTY (()) vec<rtx, va_gc> *addr_list;
2605
2606 static bool
2607 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2608 {
2609   rtx reg, addr;
2610   unsigned list_index;
2611   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2612   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2613
2614   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2615   if (list_index >= vec_safe_length (addr_list))
2616     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2617
2618   addr = (*addr_list)[list_index];
2619   if (!addr)
2620     {
2621       addr_mode = targetm.addr_space.address_mode (as);
2622       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2623       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2624       (*addr_list)[list_index] = addr;
2625     }
2626   else
2627     addr_mode = GET_MODE (addr);
2628
2629   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2630   return (memory_address_addr_space_p (mem_mode, addr, as));
2631 }
2632
2633 /* Comparison function to sort group in ascending order of addr_offset.  */
2634
2635 static int
2636 group_compare_offset (const void *a, const void *b)
2637 {
2638   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2639   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2640
2641   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2642 }
2643
2644 /* Check if small groups should be split.  Return true if no group
2645    contains more than two uses with distinct addr_offsets.  Return
2646    false otherwise.  We want to split such groups because:
2647
2648      1) Small groups don't have much benefit and may interfer with
2649         general candidate selection.
2650      2) Size for problem with only small groups is usually small and
2651         general algorithm can handle it well.
2652
2653    TODO -- Above claim may not hold when we want to merge memory
2654    accesses with conseuctive addresses.  */
2655
2656 static bool
2657 split_small_address_groups_p (struct ivopts_data *data)
2658 {
2659   unsigned int i, j, distinct = 1;
2660   struct iv_use *pre;
2661   struct iv_group *group;
2662
2663   for (i = 0; i < data->vgroups.length (); i++)
2664     {
2665       group = data->vgroups[i];
2666       if (group->vuses.length () == 1)
2667         continue;
2668
2669       gcc_assert (address_p (group->type));
2670       if (group->vuses.length () == 2)
2671         {
2672           if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2673                                       group->vuses[1]->addr_offset) > 0)
2674             std::swap (group->vuses[0], group->vuses[1]);
2675         }
2676       else
2677         group->vuses.qsort (group_compare_offset);
2678
2679       if (distinct > 2)
2680         continue;
2681
2682       distinct = 1;
2683       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2684         {
2685           if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2686             {
2687               pre = group->vuses[j];
2688               distinct++;
2689             }
2690
2691           if (distinct > 2)
2692             break;
2693         }
2694     }
2695
2696   return (distinct <= 2);
2697 }
2698
2699 /* For each group of address type uses, this function further groups
2700    these uses according to the maximum offset supported by target's
2701    [base + offset] addressing mode.  */
2702
2703 static void
2704 split_address_groups (struct ivopts_data *data)
2705 {
2706   unsigned int i, j;
2707   /* Always split group.  */
2708   bool split_p = split_small_address_groups_p (data);
2709
2710   for (i = 0; i < data->vgroups.length (); i++)
2711     {
2712       struct iv_group *new_group = NULL;
2713       struct iv_group *group = data->vgroups[i];
2714       struct iv_use *use = group->vuses[0];
2715
2716       use->id = 0;
2717       use->group_id = group->id;
2718       if (group->vuses.length () == 1)
2719         continue;
2720
2721       gcc_assert (address_p (use->type));
2722
2723       for (j = 1; j < group->vuses.length ();)
2724         {
2725           struct iv_use *next = group->vuses[j];
2726           poly_int64 offset = next->addr_offset - use->addr_offset;
2727
2728           /* Split group if aksed to, or the offset against the first
2729              use can't fit in offset part of addressing mode.  IV uses
2730              having the same offset are still kept in one group.  */
2731           if (maybe_ne (offset, 0)
2732               && (split_p || !addr_offset_valid_p (use, offset)))
2733             {
2734               if (!new_group)
2735                 new_group = record_group (data, group->type);
2736               group->vuses.ordered_remove (j);
2737               new_group->vuses.safe_push (next);
2738               continue;
2739             }
2740
2741           next->id = j;
2742           next->group_id = group->id;
2743           j++;
2744         }
2745     }
2746 }
2747
2748 /* Finds uses of the induction variables that are interesting.  */
2749
2750 static void
2751 find_interesting_uses (struct ivopts_data *data)
2752 {
2753   basic_block bb;
2754   gimple_stmt_iterator bsi;
2755   basic_block *body = get_loop_body (data->current_loop);
2756   unsigned i;
2757   edge e;
2758
2759   for (i = 0; i < data->current_loop->num_nodes; i++)
2760     {
2761       edge_iterator ei;
2762       bb = body[i];
2763
2764       FOR_EACH_EDGE (e, ei, bb->succs)
2765         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2766             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2767           find_interesting_uses_outside (data, e);
2768
2769       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2770         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2771       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2772         if (!is_gimple_debug (gsi_stmt (bsi)))
2773           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2774     }
2775   free (body);
2776
2777   split_address_groups (data);
2778
2779   if (dump_file && (dump_flags & TDF_DETAILS))
2780     {
2781       fprintf (dump_file, "\n<IV Groups>:\n");
2782       dump_groups (dump_file, data);
2783       fprintf (dump_file, "\n");
2784     }
2785 }
2786
2787 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2788    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2789    we are at the top-level of the processed address.  */
2790
2791 static tree
2792 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2793                 poly_int64 *offset)
2794 {
2795   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2796   enum tree_code code;
2797   tree type, orig_type = TREE_TYPE (expr);
2798   poly_int64 off0, off1;
2799   HOST_WIDE_INT st;
2800   tree orig_expr = expr;
2801
2802   STRIP_NOPS (expr);
2803
2804   type = TREE_TYPE (expr);
2805   code = TREE_CODE (expr);
2806   *offset = 0;
2807
2808   switch (code)
2809     {
2810     case POINTER_PLUS_EXPR:
2811     case PLUS_EXPR:
2812     case MINUS_EXPR:
2813       op0 = TREE_OPERAND (expr, 0);
2814       op1 = TREE_OPERAND (expr, 1);
2815
2816       op0 = strip_offset_1 (op0, false, false, &off0);
2817       op1 = strip_offset_1 (op1, false, false, &off1);
2818
2819       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2820       if (op0 == TREE_OPERAND (expr, 0)
2821           && op1 == TREE_OPERAND (expr, 1))
2822         return orig_expr;
2823
2824       if (integer_zerop (op1))
2825         expr = op0;
2826       else if (integer_zerop (op0))
2827         {
2828           if (code == MINUS_EXPR)
2829             expr = fold_build1 (NEGATE_EXPR, type, op1);
2830           else
2831             expr = op1;
2832         }
2833       else
2834         expr = fold_build2 (code, type, op0, op1);
2835
2836       return fold_convert (orig_type, expr);
2837
2838     case MULT_EXPR:
2839       op1 = TREE_OPERAND (expr, 1);
2840       if (!cst_and_fits_in_hwi (op1))
2841         return orig_expr;
2842
2843       op0 = TREE_OPERAND (expr, 0);
2844       op0 = strip_offset_1 (op0, false, false, &off0);
2845       if (op0 == TREE_OPERAND (expr, 0))
2846         return orig_expr;
2847
2848       *offset = off0 * int_cst_value (op1);
2849       if (integer_zerop (op0))
2850         expr = op0;
2851       else
2852         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2853
2854       return fold_convert (orig_type, expr);
2855
2856     case ARRAY_REF:
2857     case ARRAY_RANGE_REF:
2858       if (!inside_addr)
2859         return orig_expr;
2860
2861       step = array_ref_element_size (expr);
2862       if (!cst_and_fits_in_hwi (step))
2863         break;
2864
2865       st = int_cst_value (step);
2866       op1 = TREE_OPERAND (expr, 1);
2867       op1 = strip_offset_1 (op1, false, false, &off1);
2868       *offset = off1 * st;
2869
2870       if (top_compref
2871           && integer_zerop (op1))
2872         {
2873           /* Strip the component reference completely.  */
2874           op0 = TREE_OPERAND (expr, 0);
2875           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2876           *offset += off0;
2877           return op0;
2878         }
2879       break;
2880
2881     case COMPONENT_REF:
2882       {
2883         tree field;
2884
2885         if (!inside_addr)
2886           return orig_expr;
2887
2888         tmp = component_ref_field_offset (expr);
2889         field = TREE_OPERAND (expr, 1);
2890         if (top_compref
2891             && cst_and_fits_in_hwi (tmp)
2892             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2893           {
2894             HOST_WIDE_INT boffset, abs_off;
2895
2896             /* Strip the component reference completely.  */
2897             op0 = TREE_OPERAND (expr, 0);
2898             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2899             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2900             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2901             if (boffset < 0)
2902               abs_off = -abs_off;
2903
2904             *offset = off0 + int_cst_value (tmp) + abs_off;
2905             return op0;
2906           }
2907       }
2908       break;
2909
2910     case ADDR_EXPR:
2911       op0 = TREE_OPERAND (expr, 0);
2912       op0 = strip_offset_1 (op0, true, true, &off0);
2913       *offset += off0;
2914
2915       if (op0 == TREE_OPERAND (expr, 0))
2916         return orig_expr;
2917
2918       expr = build_fold_addr_expr (op0);
2919       return fold_convert (orig_type, expr);
2920
2921     case MEM_REF:
2922       /* ???  Offset operand?  */
2923       inside_addr = false;
2924       break;
2925
2926     default:
2927       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2928         return build_int_cst (orig_type, 0);
2929       return orig_expr;
2930     }
2931
2932   /* Default handling of expressions for that we want to recurse into
2933      the first operand.  */
2934   op0 = TREE_OPERAND (expr, 0);
2935   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2936   *offset += off0;
2937
2938   if (op0 == TREE_OPERAND (expr, 0)
2939       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2940     return orig_expr;
2941
2942   expr = copy_node (expr);
2943   TREE_OPERAND (expr, 0) = op0;
2944   if (op1)
2945     TREE_OPERAND (expr, 1) = op1;
2946
2947   /* Inside address, we might strip the top level component references,
2948      thus changing type of the expression.  Handling of ADDR_EXPR
2949      will fix that.  */
2950   expr = fold_convert (orig_type, expr);
2951
2952   return expr;
2953 }
2954
2955 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2956
2957 tree
2958 strip_offset (tree expr, poly_uint64_pod *offset)
2959 {
2960   poly_int64 off;
2961   tree core = strip_offset_1 (expr, false, false, &off);
2962   *offset = off;
2963   return core;
2964 }
2965
2966 /* Returns variant of TYPE that can be used as base for different uses.
2967    We return unsigned type with the same precision, which avoids problems
2968    with overflows.  */
2969
2970 static tree
2971 generic_type_for (tree type)
2972 {
2973   if (POINTER_TYPE_P (type))
2974     return unsigned_type_for (type);
2975
2976   if (TYPE_UNSIGNED (type))
2977     return type;
2978
2979   return unsigned_type_for (type);
2980 }
2981
2982 /* Private data for walk_tree.  */
2983
2984 struct walk_tree_data
2985 {
2986   bitmap *inv_vars;
2987   struct ivopts_data *idata;
2988 };
2989
2990 /* Callback function for walk_tree, it records invariants and symbol
2991    reference in *EXPR_P.  DATA is the structure storing result info.  */
2992
2993 static tree
2994 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2995 {
2996   tree op = *expr_p;
2997   struct version_info *info;
2998   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2999
3000   if (TREE_CODE (op) != SSA_NAME)
3001     return NULL_TREE;
3002
3003   info = name_info (wdata->idata, op);
3004   /* Because we expand simple operations when finding IVs, loop invariant
3005      variable that isn't referred by the original loop could be used now.
3006      Record such invariant variables here.  */
3007   if (!info->iv)
3008     {
3009       struct ivopts_data *idata = wdata->idata;
3010       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3011
3012       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3013         {
3014           set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
3015           record_invariant (idata, op, false);
3016         }
3017     }
3018   if (!info->inv_id || info->has_nonlin_use)
3019     return NULL_TREE;
3020
3021   if (!*wdata->inv_vars)
3022     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3023   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3024
3025   return NULL_TREE;
3026 }
3027
3028 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3029    store it.  */
3030
3031 static inline void
3032 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3033 {
3034   struct walk_tree_data wdata;
3035
3036   if (!inv_vars)
3037     return;
3038
3039   wdata.idata = data;
3040   wdata.inv_vars = inv_vars;
3041   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3042 }
3043
3044 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3045    will be recorded if it doesn't exist yet.  Given below two exprs:
3046      inv_expr + cst1, inv_expr + cst2
3047    It's hard to make decision whether constant part should be stripped
3048    or not.  We choose to not strip based on below facts:
3049      1) We need to count ADD cost for constant part if it's stripped,
3050         which isn't always trivial where this functions is called.
3051      2) Stripping constant away may be conflict with following loop
3052         invariant hoisting pass.
3053      3) Not stripping constant away results in more invariant exprs,
3054         which usually leads to decision preferring lower reg pressure.  */
3055
3056 static iv_inv_expr_ent *
3057 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3058 {
3059   STRIP_NOPS (inv_expr);
3060
3061   if (poly_int_tree_p (inv_expr)
3062       || TREE_CODE (inv_expr) == SSA_NAME)
3063     return NULL;
3064
3065   /* Don't strip constant part away as we used to.  */
3066
3067   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3068   struct iv_inv_expr_ent ent;
3069   ent.expr = inv_expr;
3070   ent.hash = iterative_hash_expr (inv_expr, 0);
3071   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3072
3073   if (!*slot)
3074     {
3075       *slot = XNEW (struct iv_inv_expr_ent);
3076       (*slot)->expr = inv_expr;
3077       (*slot)->hash = ent.hash;
3078       (*slot)->id = ++data->max_inv_expr_id;
3079     }
3080
3081   return *slot;
3082 }
3083
3084 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3085    position to POS.  If USE is not NULL, the candidate is set as related to
3086    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3087    replacement of the final value of the iv by a direct computation.  */
3088
3089 static struct iv_cand *
3090 add_candidate_1 (struct ivopts_data *data,
3091                  tree base, tree step, bool important, enum iv_position pos,
3092                  struct iv_use *use, gimple *incremented_at,
3093                  struct iv *orig_iv = NULL)
3094 {
3095   unsigned i;
3096   struct iv_cand *cand = NULL;
3097   tree type, orig_type;
3098
3099   gcc_assert (base && step);
3100
3101   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3102      live, but the ivopts code may replace a real pointer with one
3103      pointing before or after the memory block that is then adjusted
3104      into the memory block during the loop.  FIXME: It would likely be
3105      better to actually force the pointer live and still use ivopts;
3106      for example, it would be enough to write the pointer into memory
3107      and keep it there until after the loop.  */
3108   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3109     return NULL;
3110
3111   /* For non-original variables, make sure their values are computed in a type
3112      that does not invoke undefined behavior on overflows (since in general,
3113      we cannot prove that these induction variables are non-wrapping).  */
3114   if (pos != IP_ORIGINAL)
3115     {
3116       orig_type = TREE_TYPE (base);
3117       type = generic_type_for (orig_type);
3118       if (type != orig_type)
3119         {
3120           base = fold_convert (type, base);
3121           step = fold_convert (type, step);
3122         }
3123     }
3124
3125   for (i = 0; i < data->vcands.length (); i++)
3126     {
3127       cand = data->vcands[i];
3128
3129       if (cand->pos != pos)
3130         continue;
3131
3132       if (cand->incremented_at != incremented_at
3133           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3134               && cand->ainc_use != use))
3135         continue;
3136
3137       if (operand_equal_p (base, cand->iv->base, 0)
3138           && operand_equal_p (step, cand->iv->step, 0)
3139           && (TYPE_PRECISION (TREE_TYPE (base))
3140               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3141         break;
3142     }
3143
3144   if (i == data->vcands.length ())
3145     {
3146       cand = XCNEW (struct iv_cand);
3147       cand->id = i;
3148       cand->iv = alloc_iv (data, base, step);
3149       cand->pos = pos;
3150       if (pos != IP_ORIGINAL)
3151         {
3152           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3153           cand->var_after = cand->var_before;
3154         }
3155       cand->important = important;
3156       cand->incremented_at = incremented_at;
3157       data->vcands.safe_push (cand);
3158
3159       if (!poly_int_tree_p (step))
3160         {
3161           find_inv_vars (data, &step, &cand->inv_vars);
3162
3163           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3164           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3165           if (inv_expr != NULL)
3166             {
3167               cand->inv_exprs = cand->inv_vars;
3168               cand->inv_vars = NULL;
3169               if (cand->inv_exprs)
3170                 bitmap_clear (cand->inv_exprs);
3171               else
3172                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3173
3174               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3175             }
3176         }
3177
3178       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3179         cand->ainc_use = use;
3180       else
3181         cand->ainc_use = NULL;
3182
3183       cand->orig_iv = orig_iv;
3184       if (dump_file && (dump_flags & TDF_DETAILS))
3185         dump_cand (dump_file, cand);
3186     }
3187
3188   cand->important |= important;
3189
3190   /* Relate candidate to the group for which it is added.  */
3191   if (use)
3192     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3193
3194   return cand;
3195 }
3196
3197 /* Returns true if incrementing the induction variable at the end of the LOOP
3198    is allowed.
3199
3200    The purpose is to avoid splitting latch edge with a biv increment, thus
3201    creating a jump, possibly confusing other optimization passes and leaving
3202    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3203    available (so we do not have a better alternative), or if the latch edge
3204    is already nonempty.  */
3205
3206 static bool
3207 allow_ip_end_pos_p (struct loop *loop)
3208 {
3209   if (!ip_normal_pos (loop))
3210     return true;
3211
3212   if (!empty_block_p (ip_end_pos (loop)))
3213     return true;
3214
3215   return false;
3216 }
3217
3218 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3219    Important field is set to IMPORTANT.  */
3220
3221 static void
3222 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3223                         bool important, struct iv_use *use)
3224 {
3225   basic_block use_bb = gimple_bb (use->stmt);
3226   machine_mode mem_mode;
3227   unsigned HOST_WIDE_INT cstepi;
3228
3229   /* If we insert the increment in any position other than the standard
3230      ones, we must ensure that it is incremented once per iteration.
3231      It must not be in an inner nested loop, or one side of an if
3232      statement.  */
3233   if (use_bb->loop_father != data->current_loop
3234       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3235       || stmt_can_throw_internal (cfun, use->stmt)
3236       || !cst_and_fits_in_hwi (step))
3237     return;
3238
3239   cstepi = int_cst_value (step);
3240
3241   mem_mode = TYPE_MODE (use->mem_type);
3242   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3243         || USE_STORE_PRE_INCREMENT (mem_mode))
3244        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3245       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3246            || USE_STORE_PRE_DECREMENT (mem_mode))
3247           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3248     {
3249       enum tree_code code = MINUS_EXPR;
3250       tree new_base;
3251       tree new_step = step;
3252
3253       if (POINTER_TYPE_P (TREE_TYPE (base)))
3254         {
3255           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3256           code = POINTER_PLUS_EXPR;
3257         }
3258       else
3259         new_step = fold_convert (TREE_TYPE (base), new_step);
3260       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3261       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3262                        use->stmt);
3263     }
3264   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3265         || USE_STORE_POST_INCREMENT (mem_mode))
3266        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3267       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3268            || USE_STORE_POST_DECREMENT (mem_mode))
3269           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3270     {
3271       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3272                        use->stmt);
3273     }
3274 }
3275
3276 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3277    position to POS.  If USE is not NULL, the candidate is set as related to
3278    it.  The candidate computation is scheduled before exit condition and at
3279    the end of loop.  */
3280
3281 static void
3282 add_candidate (struct ivopts_data *data,
3283                tree base, tree step, bool important, struct iv_use *use,
3284                struct iv *orig_iv = NULL)
3285 {
3286   if (ip_normal_pos (data->current_loop))
3287     add_candidate_1 (data, base, step, important,
3288                      IP_NORMAL, use, NULL, orig_iv);
3289   if (ip_end_pos (data->current_loop)
3290       && allow_ip_end_pos_p (data->current_loop))
3291     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3292 }
3293
3294 /* Adds standard iv candidates.  */
3295
3296 static void
3297 add_standard_iv_candidates (struct ivopts_data *data)
3298 {
3299   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3300
3301   /* The same for a double-integer type if it is still fast enough.  */
3302   if (TYPE_PRECISION
3303         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3304       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3305     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3306                    build_int_cst (long_integer_type_node, 1), true, NULL);
3307
3308   /* The same for a double-integer type if it is still fast enough.  */
3309   if (TYPE_PRECISION
3310         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3311       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3312     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3313                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3314 }
3315
3316
3317 /* Adds candidates bases on the old induction variable IV.  */
3318
3319 static void
3320 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3321 {
3322   gimple *phi;
3323   tree def;
3324   struct iv_cand *cand;
3325
3326   /* Check if this biv is used in address type use.  */
3327   if (iv->no_overflow  && iv->have_address_use
3328       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3329       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3330     {
3331       tree base = fold_convert (sizetype, iv->base);
3332       tree step = fold_convert (sizetype, iv->step);
3333
3334       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3335       add_candidate (data, base, step, true, NULL, iv);
3336       /* Add iv cand of the original type only if it has nonlinear use.  */
3337       if (iv->nonlin_use)
3338         add_candidate (data, iv->base, iv->step, true, NULL);
3339     }
3340   else
3341     add_candidate (data, iv->base, iv->step, true, NULL);
3342
3343   /* The same, but with initial value zero.  */
3344   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3345     add_candidate (data, size_int (0), iv->step, true, NULL);
3346   else
3347     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3348                    iv->step, true, NULL);
3349
3350   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3351   if (gimple_code (phi) == GIMPLE_PHI)
3352     {
3353       /* Additionally record the possibility of leaving the original iv
3354          untouched.  */
3355       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3356       /* Don't add candidate if it's from another PHI node because
3357          it's an affine iv appearing in the form of PEELED_CHREC.  */
3358       phi = SSA_NAME_DEF_STMT (def);
3359       if (gimple_code (phi) != GIMPLE_PHI)
3360         {
3361           cand = add_candidate_1 (data,
3362                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3363                                   SSA_NAME_DEF_STMT (def));
3364           if (cand)
3365             {
3366               cand->var_before = iv->ssa_name;
3367               cand->var_after = def;
3368             }
3369         }
3370       else
3371         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3372     }
3373 }
3374
3375 /* Adds candidates based on the old induction variables.  */
3376
3377 static void
3378 add_iv_candidate_for_bivs (struct ivopts_data *data)
3379 {
3380   unsigned i;
3381   struct iv *iv;
3382   bitmap_iterator bi;
3383
3384   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3385     {
3386       iv = ver_info (data, i)->iv;
3387       if (iv && iv->biv_p && !integer_zerop (iv->step))
3388         add_iv_candidate_for_biv (data, iv);
3389     }
3390 }
3391
3392 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3393
3394 static void
3395 record_common_cand (struct ivopts_data *data, tree base,
3396                     tree step, struct iv_use *use)
3397 {
3398   struct iv_common_cand ent;
3399   struct iv_common_cand **slot;
3400
3401   ent.base = base;
3402   ent.step = step;
3403   ent.hash = iterative_hash_expr (base, 0);
3404   ent.hash = iterative_hash_expr (step, ent.hash);
3405
3406   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3407   if (*slot == NULL)
3408     {
3409       *slot = new iv_common_cand ();
3410       (*slot)->base = base;
3411       (*slot)->step = step;
3412       (*slot)->uses.create (8);
3413       (*slot)->hash = ent.hash;
3414       data->iv_common_cands.safe_push ((*slot));
3415     }
3416
3417   gcc_assert (use != NULL);
3418   (*slot)->uses.safe_push (use);
3419   return;
3420 }
3421
3422 /* Comparison function used to sort common candidates.  */
3423
3424 static int
3425 common_cand_cmp (const void *p1, const void *p2)
3426 {
3427   unsigned n1, n2;
3428   const struct iv_common_cand *const *const ccand1
3429     = (const struct iv_common_cand *const *)p1;
3430   const struct iv_common_cand *const *const ccand2
3431     = (const struct iv_common_cand *const *)p2;
3432
3433   n1 = (*ccand1)->uses.length ();
3434   n2 = (*ccand2)->uses.length ();
3435   return n2 - n1;
3436 }
3437
3438 /* Adds IV candidates based on common candidated recorded.  */
3439
3440 static void
3441 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3442 {
3443   unsigned i, j;
3444   struct iv_cand *cand_1, *cand_2;
3445
3446   data->iv_common_cands.qsort (common_cand_cmp);
3447   for (i = 0; i < data->iv_common_cands.length (); i++)
3448     {
3449       struct iv_common_cand *ptr = data->iv_common_cands[i];
3450
3451       /* Only add IV candidate if it's derived from multiple uses.  */
3452       if (ptr->uses.length () <= 1)
3453         break;
3454
3455       cand_1 = NULL;
3456       cand_2 = NULL;
3457       if (ip_normal_pos (data->current_loop))
3458         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3459                                   false, IP_NORMAL, NULL, NULL);
3460
3461       if (ip_end_pos (data->current_loop)
3462           && allow_ip_end_pos_p (data->current_loop))
3463         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3464                                   false, IP_END, NULL, NULL);
3465
3466       /* Bind deriving uses and the new candidates.  */
3467       for (j = 0; j < ptr->uses.length (); j++)
3468         {
3469           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3470           if (cand_1)
3471             bitmap_set_bit (group->related_cands, cand_1->id);
3472           if (cand_2)
3473             bitmap_set_bit (group->related_cands, cand_2->id);
3474         }
3475     }
3476
3477   /* Release data since it is useless from this point.  */
3478   data->iv_common_cand_tab->empty ();
3479   data->iv_common_cands.truncate (0);
3480 }
3481
3482 /* Adds candidates based on the value of USE's iv.  */
3483
3484 static void
3485 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3486 {
3487   poly_uint64 offset;
3488   tree base;
3489   tree basetype;
3490   struct iv *iv = use->iv;
3491
3492   add_candidate (data, iv->base, iv->step, false, use);
3493
3494   /* Record common candidate for use in case it can be shared by others.  */
3495   record_common_cand (data, iv->base, iv->step, use);
3496
3497   /* Record common candidate with initial value zero.  */
3498   basetype = TREE_TYPE (iv->base);
3499   if (POINTER_TYPE_P (basetype))
3500     basetype = sizetype;
3501   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3502
3503   /* Record common candidate with constant offset stripped in base.
3504      Like the use itself, we also add candidate directly for it.  */
3505   base = strip_offset (iv->base, &offset);
3506   if (maybe_ne (offset, 0U) || base != iv->base)
3507     {
3508       record_common_cand (data, base, iv->step, use);
3509       add_candidate (data, base, iv->step, false, use);
3510     }
3511
3512   /* Record common candidate with base_object removed in base.  */
3513   base = iv->base;
3514   STRIP_NOPS (base);
3515   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3516     {
3517       tree step = iv->step;
3518
3519       STRIP_NOPS (step);
3520       base = TREE_OPERAND (base, 1);
3521       step = fold_convert (sizetype, step);
3522       record_common_cand (data, base, step, use);
3523       /* Also record common candidate with offset stripped.  */
3524       base = strip_offset (base, &offset);
3525       if (maybe_ne (offset, 0U))
3526         record_common_cand (data, base, step, use);
3527     }
3528
3529   /* At last, add auto-incremental candidates.  Make such variables
3530      important since other iv uses with same base object may be based
3531      on it.  */
3532   if (use != NULL && address_p (use->type))
3533     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3534 }
3535
3536 /* Adds candidates based on the uses.  */
3537
3538 static void
3539 add_iv_candidate_for_groups (struct ivopts_data *data)
3540 {
3541   unsigned i;
3542
3543   /* Only add candidate for the first use in group.  */
3544   for (i = 0; i < data->vgroups.length (); i++)
3545     {
3546       struct iv_group *group = data->vgroups[i];
3547
3548       gcc_assert (group->vuses[0] != NULL);
3549       add_iv_candidate_for_use (data, group->vuses[0]);
3550     }
3551   add_iv_candidate_derived_from_uses (data);
3552 }
3553
3554 /* Record important candidates and add them to related_cands bitmaps.  */
3555
3556 static void
3557 record_important_candidates (struct ivopts_data *data)
3558 {
3559   unsigned i;
3560   struct iv_group *group;
3561
3562   for (i = 0; i < data->vcands.length (); i++)
3563     {
3564       struct iv_cand *cand = data->vcands[i];
3565
3566       if (cand->important)
3567         bitmap_set_bit (data->important_candidates, i);
3568     }
3569
3570   data->consider_all_candidates = (data->vcands.length ()
3571                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3572
3573   /* Add important candidates to groups' related_cands bitmaps.  */
3574   for (i = 0; i < data->vgroups.length (); i++)
3575     {
3576       group = data->vgroups[i];
3577       bitmap_ior_into (group->related_cands, data->important_candidates);
3578     }
3579 }
3580
3581 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3582    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3583    we allocate a simple list to every use.  */
3584
3585 static void
3586 alloc_use_cost_map (struct ivopts_data *data)
3587 {
3588   unsigned i, size, s;
3589
3590   for (i = 0; i < data->vgroups.length (); i++)
3591     {
3592       struct iv_group *group = data->vgroups[i];
3593
3594       if (data->consider_all_candidates)
3595         size = data->vcands.length ();
3596       else
3597         {
3598           s = bitmap_count_bits (group->related_cands);
3599
3600           /* Round up to the power of two, so that moduling by it is fast.  */
3601           size = s ? (1 << ceil_log2 (s)) : 1;
3602         }
3603
3604       group->n_map_members = size;
3605       group->cost_map = XCNEWVEC (struct cost_pair, size);
3606     }
3607 }
3608
3609 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3610    on invariants INV_VARS and that the value used in expressing it is
3611    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3612
3613 static void
3614 set_group_iv_cost (struct ivopts_data *data,
3615                    struct iv_group *group, struct iv_cand *cand,
3616                    comp_cost cost, bitmap inv_vars, tree value,
3617                    enum tree_code comp, bitmap inv_exprs)
3618 {
3619   unsigned i, s;
3620
3621   if (cost.infinite_cost_p ())
3622     {
3623       BITMAP_FREE (inv_vars);
3624       BITMAP_FREE (inv_exprs);
3625       return;
3626     }
3627
3628   if (data->consider_all_candidates)
3629     {
3630       group->cost_map[cand->id].cand = cand;
3631       group->cost_map[cand->id].cost = cost;
3632       group->cost_map[cand->id].inv_vars = inv_vars;
3633       group->cost_map[cand->id].inv_exprs = inv_exprs;
3634       group->cost_map[cand->id].value = value;
3635       group->cost_map[cand->id].comp = comp;
3636       return;
3637     }
3638
3639   /* n_map_members is a power of two, so this computes modulo.  */
3640   s = cand->id & (group->n_map_members - 1);
3641   for (i = s; i < group->n_map_members; i++)
3642     if (!group->cost_map[i].cand)
3643       goto found;
3644   for (i = 0; i < s; i++)
3645     if (!group->cost_map[i].cand)
3646       goto found;
3647
3648   gcc_unreachable ();
3649
3650 found:
3651   group->cost_map[i].cand = cand;
3652   group->cost_map[i].cost = cost;
3653   group->cost_map[i].inv_vars = inv_vars;
3654   group->cost_map[i].inv_exprs = inv_exprs;
3655   group->cost_map[i].value = value;
3656   group->cost_map[i].comp = comp;
3657 }
3658
3659 /* Gets cost of (GROUP, CAND) pair.  */
3660
3661 static struct cost_pair *
3662 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3663                    struct iv_cand *cand)
3664 {
3665   unsigned i, s;
3666   struct cost_pair *ret;
3667
3668   if (!cand)
3669     return NULL;
3670
3671   if (data->consider_all_candidates)
3672     {
3673       ret = group->cost_map + cand->id;
3674       if (!ret->cand)
3675         return NULL;
3676
3677       return ret;
3678     }
3679
3680   /* n_map_members is a power of two, so this computes modulo.  */
3681   s = cand->id & (group->n_map_members - 1);
3682   for (i = s; i < group->n_map_members; i++)
3683     if (group->cost_map[i].cand == cand)
3684       return group->cost_map + i;
3685     else if (group->cost_map[i].cand == NULL)
3686       return NULL;
3687   for (i = 0; i < s; i++)
3688     if (group->cost_map[i].cand == cand)
3689       return group->cost_map + i;
3690     else if (group->cost_map[i].cand == NULL)
3691       return NULL;
3692
3693   return NULL;
3694 }
3695
3696 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3697 static rtx
3698 produce_memory_decl_rtl (tree obj, int *regno)
3699 {
3700   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3701   machine_mode address_mode = targetm.addr_space.address_mode (as);
3702   rtx x;
3703
3704   gcc_assert (obj);
3705   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3706     {
3707       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3708       x = gen_rtx_SYMBOL_REF (address_mode, name);
3709       SET_SYMBOL_REF_DECL (x, obj);
3710       x = gen_rtx_MEM (DECL_MODE (obj), x);
3711       set_mem_addr_space (x, as);
3712       targetm.encode_section_info (obj, x, true);
3713     }
3714   else
3715     {
3716       x = gen_raw_REG (address_mode, (*regno)++);
3717       x = gen_rtx_MEM (DECL_MODE (obj), x);
3718       set_mem_addr_space (x, as);
3719     }
3720
3721   return x;
3722 }
3723
3724 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3725    walk_tree.  DATA contains the actual fake register number.  */
3726
3727 static tree
3728 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3729 {
3730   tree obj = NULL_TREE;
3731   rtx x = NULL_RTX;
3732   int *regno = (int *) data;
3733
3734   switch (TREE_CODE (*expr_p))
3735     {
3736     case ADDR_EXPR:
3737       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3738            handled_component_p (*expr_p);
3739            expr_p = &TREE_OPERAND (*expr_p, 0))
3740         continue;
3741       obj = *expr_p;
3742       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3743         x = produce_memory_decl_rtl (obj, regno);
3744       break;
3745
3746     case SSA_NAME:
3747       *ws = 0;
3748       obj = SSA_NAME_VAR (*expr_p);
3749       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3750       if (!obj)
3751         return NULL_TREE;
3752       if (!DECL_RTL_SET_P (obj))
3753         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3754       break;
3755
3756     case VAR_DECL:
3757     case PARM_DECL:
3758     case RESULT_DECL:
3759       *ws = 0;
3760       obj = *expr_p;
3761
3762       if (DECL_RTL_SET_P (obj))
3763         break;
3764
3765       if (DECL_MODE (obj) == BLKmode)
3766         x = produce_memory_decl_rtl (obj, regno);
3767       else
3768         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3769
3770       break;
3771
3772     default:
3773       break;
3774     }
3775
3776   if (x)
3777     {
3778       decl_rtl_to_reset.safe_push (obj);
3779       SET_DECL_RTL (obj, x);
3780     }
3781
3782   return NULL_TREE;
3783 }
3784
3785 /* Determines cost of the computation of EXPR.  */
3786
3787 static unsigned
3788 computation_cost (tree expr, bool speed)
3789 {
3790   rtx_insn *seq;
3791   rtx rslt;
3792   tree type = TREE_TYPE (expr);
3793   unsigned cost;
3794   /* Avoid using hard regs in ways which may be unsupported.  */
3795   int regno = LAST_VIRTUAL_REGISTER + 1;
3796   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3797   enum node_frequency real_frequency = node->frequency;
3798
3799   node->frequency = NODE_FREQUENCY_NORMAL;
3800   crtl->maybe_hot_insn_p = speed;
3801   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3802   start_sequence ();
3803   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3804   seq = get_insns ();
3805   end_sequence ();
3806   default_rtl_profile ();
3807   node->frequency = real_frequency;
3808
3809   cost = seq_cost (seq, speed);
3810   if (MEM_P (rslt))
3811     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3812                           TYPE_ADDR_SPACE (type), speed);
3813   else if (!REG_P (rslt))
3814     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3815
3816   return cost;
3817 }
3818
3819 /* Returns variable containing the value of candidate CAND at statement AT.  */
3820
3821 static tree
3822 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3823 {
3824   if (stmt_after_increment (loop, cand, stmt))
3825     return cand->var_after;
3826   else
3827     return cand->var_before;
3828 }
3829
3830 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3831    same precision that is at least as wide as the precision of TYPE, stores
3832    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3833    type of A and B.  */
3834
3835 static tree
3836 determine_common_wider_type (tree *a, tree *b)
3837 {
3838   tree wider_type = NULL;
3839   tree suba, subb;
3840   tree atype = TREE_TYPE (*a);
3841
3842   if (CONVERT_EXPR_P (*a))
3843     {
3844       suba = TREE_OPERAND (*a, 0);
3845       wider_type = TREE_TYPE (suba);
3846       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3847         return atype;
3848     }
3849   else
3850     return atype;
3851
3852   if (CONVERT_EXPR_P (*b))
3853     {
3854       subb = TREE_OPERAND (*b, 0);
3855       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3856         return atype;
3857     }
3858   else
3859     return atype;
3860
3861   *a = suba;
3862   *b = subb;
3863   return wider_type;
3864 }
3865
3866 /* Determines the expression by that USE is expressed from induction variable
3867    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3868    decomposed form.  The invariant part is stored in AFF_INV; while variant
3869    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3870    non-null.  Returns false if USE cannot be expressed using CAND.  */
3871
3872 static bool
3873 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3874                        struct iv_cand *cand, struct aff_tree *aff_inv,
3875                        struct aff_tree *aff_var, widest_int *prat = NULL)
3876 {
3877   tree ubase = use->iv->base, ustep = use->iv->step;
3878   tree cbase = cand->iv->base, cstep = cand->iv->step;
3879   tree common_type, uutype, var, cstep_common;
3880   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3881   aff_tree aff_cbase;
3882   widest_int rat;
3883
3884   /* We must have a precision to express the values of use.  */
3885   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3886     return false;
3887
3888   var = var_at_stmt (loop, cand, at);
3889   uutype = unsigned_type_for (utype);
3890
3891   /* If the conversion is not noop, perform it.  */
3892   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3893     {
3894       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3895           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3896         {
3897           tree inner_base, inner_step, inner_type;
3898           inner_base = TREE_OPERAND (cbase, 0);
3899           if (CONVERT_EXPR_P (cstep))
3900             inner_step = TREE_OPERAND (cstep, 0);
3901           else
3902             inner_step = cstep;
3903
3904           inner_type = TREE_TYPE (inner_base);
3905           /* If candidate is added from a biv whose type is smaller than
3906              ctype, we know both candidate and the biv won't overflow.
3907              In this case, it's safe to skip the convertion in candidate.
3908              As an example, (unsigned short)((unsigned long)A) equals to
3909              (unsigned short)A, if A has a type no larger than short.  */
3910           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3911             {
3912               cbase = inner_base;
3913               cstep = inner_step;
3914             }
3915         }
3916       cbase = fold_convert (uutype, cbase);
3917       cstep = fold_convert (uutype, cstep);
3918       var = fold_convert (uutype, var);
3919     }
3920
3921   /* Ratio is 1 when computing the value of biv cand by itself.
3922      We can't rely on constant_multiple_of in this case because the
3923      use is created after the original biv is selected.  The call
3924      could fail because of inconsistent fold behavior.  See PR68021
3925      for more information.  */
3926   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3927     {
3928       gcc_assert (is_gimple_assign (use->stmt));
3929       gcc_assert (use->iv->ssa_name == cand->var_after);
3930       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3931       rat = 1;
3932     }
3933   else if (!constant_multiple_of (ustep, cstep, &rat))
3934     return false;
3935
3936   if (prat)
3937     *prat = rat;
3938
3939   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3940      type, we achieve better folding by computing their difference in this
3941      wider type, and cast the result to UUTYPE.  We do not need to worry about
3942      overflows, as all the arithmetics will in the end be performed in UUTYPE
3943      anyway.  */
3944   common_type = determine_common_wider_type (&ubase, &cbase);
3945
3946   /* use = ubase - ratio * cbase + ratio * var.  */
3947   tree_to_aff_combination (ubase, common_type, aff_inv);
3948   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3949   tree_to_aff_combination (var, uutype, aff_var);
3950
3951   /* We need to shift the value if we are after the increment.  */
3952   if (stmt_after_increment (loop, cand, at))
3953     {
3954       aff_tree cstep_aff;
3955
3956       if (common_type != uutype)
3957         cstep_common = fold_convert (common_type, cstep);
3958       else
3959         cstep_common = cstep;
3960
3961       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3962       aff_combination_add (&aff_cbase, &cstep_aff);
3963     }
3964
3965   aff_combination_scale (&aff_cbase, -rat);
3966   aff_combination_add (aff_inv, &aff_cbase);
3967   if (common_type != uutype)
3968     aff_combination_convert (aff_inv, uutype);
3969
3970   aff_combination_scale (aff_var, rat);
3971   return true;
3972 }
3973
3974 /* Determines the expression by that USE is expressed from induction variable
3975    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3976    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3977
3978 static bool
3979 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3980                      struct iv_cand *cand, struct aff_tree *aff)
3981 {
3982   aff_tree aff_var;
3983
3984   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3985     return false;
3986
3987   aff_combination_add (aff, &aff_var);
3988   return true;
3989 }
3990
3991 /* Return the type of USE.  */
3992
3993 static tree
3994 get_use_type (struct iv_use *use)
3995 {
3996   tree base_type = TREE_TYPE (use->iv->base);
3997   tree type;
3998
3999   if (use->type == USE_REF_ADDRESS)
4000     {
4001       /* The base_type may be a void pointer.  Create a pointer type based on
4002          the mem_ref instead.  */
4003       type = build_pointer_type (TREE_TYPE (*use->op_p));
4004       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4005                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4006     }
4007   else
4008     type = base_type;
4009
4010   return type;
4011 }
4012
4013 /* Determines the expression by that USE is expressed from induction variable
4014    CAND at statement AT in LOOP.  The computation is unshared.  */
4015
4016 static tree
4017 get_computation_at (struct loop *loop, gimple *at,
4018                     struct iv_use *use, struct iv_cand *cand)
4019 {
4020   aff_tree aff;
4021   tree type = get_use_type (use);
4022
4023   if (!get_computation_aff (loop, at, use, cand, &aff))
4024     return NULL_TREE;
4025   unshare_aff_combination (&aff);
4026   return fold_convert (type, aff_combination_to_tree (&aff));
4027 }
4028
4029 /* Adjust the cost COST for being in loop setup rather than loop body.
4030    If we're optimizing for space, the loop setup overhead is constant;
4031    if we're optimizing for speed, amortize it over the per-iteration cost.
4032    If ROUND_UP_P is true, the result is round up rather than to zero when
4033    optimizing for speed.  */
4034 static int64_t
4035 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4036                    bool round_up_p = false)
4037 {
4038   if (cost == INFTY)
4039     return cost;
4040   else if (optimize_loop_for_speed_p (data->current_loop))
4041     {
4042       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4043       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4044     }
4045   else
4046     return cost;
4047 }
4048
4049 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4050    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4051    calculating the operands of EXPR.  Returns true if successful, and returns
4052    the cost in COST.  */
4053
4054 static bool
4055 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4056                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4057 {
4058   comp_cost res;
4059   tree op1 = TREE_OPERAND (expr, 1);
4060   tree cst = TREE_OPERAND (mult, 1);
4061   tree multop = TREE_OPERAND (mult, 0);
4062   int m = exact_log2 (int_cst_value (cst));
4063   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4064   int as_cost, sa_cost;
4065   bool mult_in_op1;
4066
4067   if (!(m >= 0 && m < maxm))
4068     return false;
4069
4070   STRIP_NOPS (op1);
4071   mult_in_op1 = operand_equal_p (op1, mult, 0);
4072
4073   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4074
4075   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4076      use that in preference to a shift insn followed by an add insn.  */
4077   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4078              ? shiftadd_cost (speed, mode, m)
4079              : (mult_in_op1
4080                 ? shiftsub1_cost (speed, mode, m)
4081                 : shiftsub0_cost (speed, mode, m)));
4082
4083   res = comp_cost (MIN (as_cost, sa_cost), 0);
4084   res += (mult_in_op1 ? cost0 : cost1);
4085
4086   STRIP_NOPS (multop);
4087   if (!is_gimple_val (multop))
4088     res += force_expr_to_var_cost (multop, speed);
4089
4090   *cost = res;
4091   return true;
4092 }
4093
4094 /* Estimates cost of forcing expression EXPR into a variable.  */
4095
4096 static comp_cost
4097 force_expr_to_var_cost (tree expr, bool speed)
4098 {
4099   static bool costs_initialized = false;
4100   static unsigned integer_cost [2];
4101   static unsigned symbol_cost [2];
4102   static unsigned address_cost [2];
4103   tree op0, op1;
4104   comp_cost cost0, cost1, cost;
4105   machine_mode mode;
4106   scalar_int_mode int_mode;
4107
4108   if (!costs_initialized)
4109     {
4110       tree type = build_pointer_type (integer_type_node);
4111       tree var, addr;
4112       rtx x;
4113       int i;
4114
4115       var = create_tmp_var_raw (integer_type_node, "test_var");
4116       TREE_STATIC (var) = 1;
4117       x = produce_memory_decl_rtl (var, NULL);
4118       SET_DECL_RTL (var, x);
4119
4120       addr = build1 (ADDR_EXPR, type, var);
4121
4122
4123       for (i = 0; i < 2; i++)
4124         {
4125           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4126                                                              2000), i);
4127
4128           symbol_cost[i] = computation_cost (addr, i) + 1;
4129
4130           address_cost[i]
4131             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4132           if (dump_file && (dump_flags & TDF_DETAILS))
4133             {
4134               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4135               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4136               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4137               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4138               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4139               fprintf (dump_file, "\n");
4140             }
4141         }
4142
4143       costs_initialized = true;
4144     }
4145
4146   STRIP_NOPS (expr);
4147
4148   if (SSA_VAR_P (expr))
4149     return no_cost;
4150
4151   if (is_gimple_min_invariant (expr))
4152     {
4153       if (poly_int_tree_p (expr))
4154         return comp_cost (integer_cost [speed], 0);
4155
4156       if (TREE_CODE (expr) == ADDR_EXPR)
4157         {
4158           tree obj = TREE_OPERAND (expr, 0);
4159
4160           if (VAR_P (obj)
4161               || TREE_CODE (obj) == PARM_DECL
4162               || TREE_CODE (obj) == RESULT_DECL)
4163             return comp_cost (symbol_cost [speed], 0);
4164         }
4165
4166       return comp_cost (address_cost [speed], 0);
4167     }
4168
4169   switch (TREE_CODE (expr))
4170     {
4171     case POINTER_PLUS_EXPR:
4172     case PLUS_EXPR:
4173     case MINUS_EXPR:
4174     case MULT_EXPR:
4175     case TRUNC_DIV_EXPR:
4176     case BIT_AND_EXPR:
4177     case BIT_IOR_EXPR:
4178     case LSHIFT_EXPR:
4179     case RSHIFT_EXPR:
4180       op0 = TREE_OPERAND (expr, 0);
4181       op1 = TREE_OPERAND (expr, 1);
4182       STRIP_NOPS (op0);
4183       STRIP_NOPS (op1);
4184       break;
4185
4186     CASE_CONVERT:
4187     case NEGATE_EXPR:
4188     case BIT_NOT_EXPR:
4189       op0 = TREE_OPERAND (expr, 0);
4190       STRIP_NOPS (op0);
4191       op1 = NULL_TREE;
4192       break;
4193
4194     default:
4195       /* Just an arbitrary value, FIXME.  */
4196       return comp_cost (target_spill_cost[speed], 0);
4197     }
4198
4199   if (op0 == NULL_TREE
4200       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4201     cost0 = no_cost;
4202   else
4203     cost0 = force_expr_to_var_cost (op0, speed);
4204
4205   if (op1 == NULL_TREE
4206       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4207     cost1 = no_cost;
4208   else
4209     cost1 = force_expr_to_var_cost (op1, speed);
4210
4211   mode = TYPE_MODE (TREE_TYPE (expr));
4212   switch (TREE_CODE (expr))
4213     {
4214     case POINTER_PLUS_EXPR:
4215     case PLUS_EXPR:
4216     case MINUS_EXPR:
4217     case NEGATE_EXPR:
4218       cost = comp_cost (add_cost (speed, mode), 0);
4219       if (TREE_CODE (expr) != NEGATE_EXPR)
4220         {
4221           tree mult = NULL_TREE;
4222           comp_cost sa_cost;
4223           if (TREE_CODE (op1) == MULT_EXPR)
4224             mult = op1;
4225           else if (TREE_CODE (op0) == MULT_EXPR)
4226             mult = op0;
4227
4228           if (mult != NULL_TREE
4229               && is_a <scalar_int_mode> (mode, &int_mode)
4230               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4231               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4232                                     speed, &sa_cost))
4233             return sa_cost;
4234         }
4235       break;
4236
4237     CASE_CONVERT:
4238       {
4239         tree inner_mode, outer_mode;
4240         outer_mode = TREE_TYPE (expr);
4241         inner_mode = TREE_TYPE (op0);
4242         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4243                                        TYPE_MODE (inner_mode), speed), 0);
4244       }
4245       break;
4246
4247     case MULT_EXPR:
4248       if (cst_and_fits_in_hwi (op0))
4249         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4250                                              mode, speed), 0);
4251       else if (cst_and_fits_in_hwi (op1))
4252         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4253                                              mode, speed), 0);
4254       else
4255         return comp_cost (target_spill_cost [speed], 0);
4256       break;
4257
4258     case TRUNC_DIV_EXPR:
4259       /* Division by power of two is usually cheap, so we allow it.  Forbid
4260          anything else.  */
4261       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4262         cost = comp_cost (add_cost (speed, mode), 0);
4263       else
4264         cost = comp_cost (target_spill_cost[speed], 0);
4265       break;
4266
4267     case BIT_AND_EXPR:
4268     case BIT_IOR_EXPR:
4269     case BIT_NOT_EXPR:
4270     case LSHIFT_EXPR:
4271     case RSHIFT_EXPR:
4272       cost = comp_cost (add_cost (speed, mode), 0);
4273       break;
4274
4275     default:
4276       gcc_unreachable ();
4277     }
4278
4279   cost += cost0;
4280   cost += cost1;
4281   return cost;
4282 }
4283
4284 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4285    invariants the computation depends on.  */
4286
4287 static comp_cost
4288 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4289 {
4290   if (!expr)
4291     return no_cost;
4292
4293   find_inv_vars (data, &expr, inv_vars);
4294   return force_expr_to_var_cost (expr, data->speed);
4295 }
4296
4297 /* Returns cost of auto-modifying address expression in shape base + offset.
4298    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4299    address expression.  The address expression has ADDR_MODE in addr space
4300    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4301    speed or size.  */
4302
4303 enum ainc_type
4304 {
4305   AINC_PRE_INC,         /* Pre increment.  */
4306   AINC_PRE_DEC,         /* Pre decrement.  */
4307   AINC_POST_INC,        /* Post increment.  */
4308   AINC_POST_DEC,        /* Post decrement.  */
4309   AINC_NONE             /* Also the number of auto increment types.  */
4310 };
4311
4312 struct ainc_cost_data
4313 {
4314   int64_t costs[AINC_NONE];
4315 };
4316
4317 static comp_cost
4318 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4319                        machine_mode addr_mode, machine_mode mem_mode,
4320                        addr_space_t as, bool speed)
4321 {
4322   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4323       && !USE_STORE_PRE_DECREMENT (mem_mode)
4324       && !USE_LOAD_POST_DECREMENT (mem_mode)
4325       && !USE_STORE_POST_DECREMENT (mem_mode)
4326       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4327       && !USE_STORE_PRE_INCREMENT (mem_mode)
4328       && !USE_LOAD_POST_INCREMENT (mem_mode)
4329       && !USE_STORE_POST_INCREMENT (mem_mode))
4330     return infinite_cost;
4331
4332   static vec<ainc_cost_data *> ainc_cost_data_list;
4333   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4334   if (idx >= ainc_cost_data_list.length ())
4335     {
4336       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4337
4338       gcc_assert (nsize > idx);
4339       ainc_cost_data_list.safe_grow_cleared (nsize);
4340     }
4341
4342   ainc_cost_data *data = ainc_cost_data_list[idx];
4343   if (data == NULL)
4344     {
4345       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4346
4347       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4348       data->costs[AINC_PRE_DEC] = INFTY;
4349       data->costs[AINC_POST_DEC] = INFTY;
4350       data->costs[AINC_PRE_INC] = INFTY;
4351       data->costs[AINC_POST_INC] = INFTY;
4352       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4353           || USE_STORE_PRE_DECREMENT (mem_mode))
4354         {
4355           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4356
4357           if (memory_address_addr_space_p (mem_mode, addr, as))
4358             data->costs[AINC_PRE_DEC]
4359               = address_cost (addr, mem_mode, as, speed);
4360         }
4361       if (USE_LOAD_POST_DECREMENT (mem_mode)
4362           || USE_STORE_POST_DECREMENT (mem_mode))
4363         {
4364           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4365
4366           if (memory_address_addr_space_p (mem_mode, addr, as))
4367             data->costs[AINC_POST_DEC]
4368               = address_cost (addr, mem_mode, as, speed);
4369         }
4370       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4371           || USE_STORE_PRE_INCREMENT (mem_mode))
4372         {
4373           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4374
4375           if (memory_address_addr_space_p (mem_mode, addr, as))
4376             data->costs[AINC_PRE_INC]
4377               = address_cost (addr, mem_mode, as, speed);
4378         }
4379       if (USE_LOAD_POST_INCREMENT (mem_mode)
4380           || USE_STORE_POST_INCREMENT (mem_mode))
4381         {
4382           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4383
4384           if (memory_address_addr_space_p (mem_mode, addr, as))
4385             data->costs[AINC_POST_INC]
4386               = address_cost (addr, mem_mode, as, speed);
4387         }
4388       ainc_cost_data_list[idx] = data;
4389     }
4390
4391   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4392   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4393     return comp_cost (data->costs[AINC_POST_INC], 0);
4394   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4395     return comp_cost (data->costs[AINC_POST_DEC], 0);
4396   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4397     return comp_cost (data->costs[AINC_PRE_INC], 0);
4398   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4399     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4400
4401   return infinite_cost;
4402 }
4403
4404 /* Return cost of computing USE's address expression by using CAND.
4405    AFF_INV and AFF_VAR represent invariant and variant parts of the
4406    address expression, respectively.  If AFF_INV is simple, store
4407    the loop invariant variables which are depended by it in INV_VARS;
4408    if AFF_INV is complicated, handle it as a new invariant expression
4409    and record it in INV_EXPR.  RATIO indicates multiple times between
4410    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4411    value to it indicating if this is an auto-increment address.  */
4412
4413 static comp_cost
4414 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4415                   struct iv_cand *cand, aff_tree *aff_inv,
4416                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4417                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4418                   bool *can_autoinc, bool speed)
4419 {
4420   rtx addr;
4421   bool simple_inv = true;
4422   tree comp_inv = NULL_TREE, type = aff_var->type;
4423   comp_cost var_cost = no_cost, cost = no_cost;
4424   struct mem_address parts = {NULL_TREE, integer_one_node,
4425                               NULL_TREE, NULL_TREE, NULL_TREE};
4426   machine_mode addr_mode = TYPE_MODE (type);
4427   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4428   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4429   /* Only true if ratio != 1.  */
4430   bool ok_with_ratio_p = false;
4431   bool ok_without_ratio_p = false;
4432
4433   if (!aff_combination_const_p (aff_inv))
4434     {
4435       parts.index = integer_one_node;
4436       /* Addressing mode "base + index".  */
4437       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4438       if (ratio != 1)
4439         {
4440           parts.step = wide_int_to_tree (type, ratio);
4441           /* Addressing mode "base + index << scale".  */
4442           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4443           if (!ok_with_ratio_p)
4444             parts.step = NULL_TREE;
4445         }
4446       if (ok_with_ratio_p || ok_without_ratio_p)
4447         {
4448           if (maybe_ne (aff_inv->offset, 0))
4449             {
4450               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4451               /* Addressing mode "base + index [<< scale] + offset".  */
4452               if (!valid_mem_ref_p (mem_mode, as, &parts))
4453                 parts.offset = NULL_TREE;
4454               else
4455                 aff_inv->offset = 0;
4456             }
4457
4458           move_fixed_address_to_symbol (&parts, aff_inv);
4459           /* Base is fixed address and is moved to symbol part.  */
4460           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4461             parts.base = NULL_TREE;
4462
4463           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4464           if (parts.symbol != NULL_TREE
4465               && !valid_mem_ref_p (mem_mode, as, &parts))
4466             {
4467               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4468               parts.symbol = NULL_TREE;
4469               /* Reset SIMPLE_INV since symbol address needs to be computed
4470                  outside of address expression in this case.  */
4471               simple_inv = false;
4472               /* Symbol part is moved back to base part, it can't be NULL.  */
4473               parts.base = integer_one_node;
4474             }
4475         }
4476       else
4477         parts.index = NULL_TREE;
4478     }
4479   else
4480     {
4481       poly_int64 ainc_step;
4482       if (can_autoinc
4483           && ratio == 1
4484           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4485         {
4486           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4487
4488           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4489             ainc_offset += ainc_step;
4490           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4491                                         addr_mode, mem_mode, as, speed);
4492           if (!cost.infinite_cost_p ())
4493             {
4494               *can_autoinc = true;
4495               return cost;
4496             }
4497           cost = no_cost;
4498         }
4499       if (!aff_combination_zero_p (aff_inv))
4500         {
4501           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4502           /* Addressing mode "base + offset".  */
4503           if (!valid_mem_ref_p (mem_mode, as, &parts))
4504             parts.offset = NULL_TREE;
4505           else
4506             aff_inv->offset = 0;
4507         }
4508     }
4509
4510   if (simple_inv)
4511     simple_inv = (aff_inv == NULL
4512                   || aff_combination_const_p (aff_inv)
4513                   || aff_combination_singleton_var_p (aff_inv));
4514   if (!aff_combination_zero_p (aff_inv))
4515     comp_inv = aff_combination_to_tree (aff_inv);
4516   if (comp_inv != NULL_TREE)
4517     cost = force_var_cost (data, comp_inv, inv_vars);
4518   if (ratio != 1 && parts.step == NULL_TREE)
4519     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4520   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4521     var_cost += add_cost (speed, addr_mode);
4522
4523   if (comp_inv && inv_expr && !simple_inv)
4524     {
4525       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4526       /* Clear depends on.  */
4527       if (*inv_expr != NULL && inv_vars && *inv_vars)
4528         bitmap_clear (*inv_vars);
4529
4530       /* Cost of small invariant expression adjusted against loop niters
4531          is usually zero, which makes it difficult to be differentiated
4532          from candidate based on loop invariant variables.  Secondly, the
4533          generated invariant expression may not be hoisted out of loop by
4534          following pass.  We penalize the cost by rounding up in order to
4535          neutralize such effects.  */
4536       cost.cost = adjust_setup_cost (data, cost.cost, true);
4537       cost.scratch = cost.cost;
4538     }
4539
4540   cost += var_cost;
4541   addr = addr_for_mem_ref (&parts, as, false);
4542   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4543   cost += address_cost (addr, mem_mode, as, speed);
4544
4545   if (parts.symbol != NULL_TREE)
4546     cost.complexity += 1;
4547   /* Don't increase the complexity of adding a scaled index if it's
4548      the only kind of index that the target allows.  */
4549   if (parts.step != NULL_TREE && ok_without_ratio_p)
4550     cost.complexity += 1;
4551   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4552     cost.complexity += 1;
4553   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4554     cost.complexity += 1;
4555
4556   return cost;
4557 }
4558
4559 /* Scale (multiply) the computed COST (except scratch part that should be
4560    hoisted out a loop) by header->frequency / AT->frequency, which makes
4561    expected cost more accurate.  */
4562
4563 static comp_cost
4564 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4565 {
4566   if (data->speed
4567       && data->current_loop->header->count.to_frequency (cfun) > 0)
4568     {
4569       basic_block bb = gimple_bb (at);
4570       gcc_assert (cost.scratch <= cost.cost);
4571       int scale_factor = (int)(intptr_t) bb->aux;
4572       if (scale_factor == 1)
4573         return cost;
4574
4575       int64_t scaled_cost
4576         = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4577
4578       if (dump_file && (dump_flags & TDF_DETAILS))
4579         fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4580                  "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4581                  1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4582
4583       cost.cost = scaled_cost;
4584     }
4585
4586   return cost;
4587 }
4588
4589 /* Determines the cost of the computation by that USE is expressed
4590    from induction variable CAND.  If ADDRESS_P is true, we just need
4591    to create an address from it, otherwise we want to get it into
4592    register.  A set of invariants we depend on is stored in INV_VARS.
4593    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4594    addressing is likely.  If INV_EXPR is nonnull, record invariant
4595    expr entry in it.  */
4596
4597 static comp_cost
4598 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4599                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4600                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4601 {
4602   gimple *at = use->stmt;
4603   tree ubase = use->iv->base, cbase = cand->iv->base;
4604   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4605   tree comp_inv = NULL_TREE;
4606   HOST_WIDE_INT ratio, aratio;
4607   comp_cost cost;
4608   widest_int rat;
4609   aff_tree aff_inv, aff_var;
4610   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4611
4612   if (inv_vars)
4613     *inv_vars = NULL;
4614   if (can_autoinc)
4615     *can_autoinc = false;
4616   if (inv_expr)
4617     *inv_expr = NULL;
4618
4619   /* Check if we have enough precision to express the values of use.  */
4620   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4621     return infinite_cost;
4622
4623   if (address_p
4624       || (use->iv->base_object
4625           && cand->iv->base_object
4626           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4627           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4628     {
4629       /* Do not try to express address of an object with computation based
4630          on address of a different object.  This may cause problems in rtl
4631          level alias analysis (that does not expect this to be happening,
4632          as this is illegal in C), and would be unlikely to be useful
4633          anyway.  */
4634       if (use->iv->base_object
4635           && cand->iv->base_object
4636           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4637         return infinite_cost;
4638     }
4639
4640   if (!get_computation_aff_1 (data->current_loop, at, use,
4641                               cand, &aff_inv, &aff_var, &rat)
4642       || !wi::fits_shwi_p (rat))
4643     return infinite_cost;
4644
4645   ratio = rat.to_shwi ();
4646   if (address_p)
4647     {
4648       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4649                                inv_vars, inv_expr, can_autoinc, speed);
4650       return get_scaled_computation_cost_at (data, at, cost);
4651     }
4652
4653   bool simple_inv = (aff_combination_const_p (&aff_inv)
4654                      || aff_combination_singleton_var_p (&aff_inv));
4655   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4656   aff_combination_convert (&aff_inv, signed_type);
4657   if (!aff_combination_zero_p (&aff_inv))
4658     comp_inv = aff_combination_to_tree (&aff_inv);
4659
4660   cost = force_var_cost (data, comp_inv, inv_vars);
4661   if (comp_inv && inv_expr && !simple_inv)
4662     {
4663       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4664       /* Clear depends on.  */
4665       if (*inv_expr != NULL && inv_vars && *inv_vars)
4666         bitmap_clear (*inv_vars);
4667
4668       cost.cost = adjust_setup_cost (data, cost.cost);
4669       /* Record setup cost in scratch field.  */
4670       cost.scratch = cost.cost;
4671     }
4672   /* Cost of constant integer can be covered when adding invariant part to
4673      variant part.  */
4674   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4675     cost = no_cost;
4676
4677   /* Need type narrowing to represent use with cand.  */
4678   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4679     {
4680       machine_mode outer_mode = TYPE_MODE (utype);
4681       machine_mode inner_mode = TYPE_MODE (ctype);
4682       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4683     }
4684
4685   /* Turn a + i * (-c) into a - i * c.  */
4686   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4687     aratio = -ratio;
4688   else
4689     aratio = ratio;
4690
4691   if (ratio != 1)
4692     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4693
4694   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4695      instruction.  */
4696   /* Need to add up the invariant and variant parts.  */
4697   if (comp_inv && !integer_zerop (comp_inv))
4698     cost += add_cost (speed, TYPE_MODE (utype));
4699
4700   return get_scaled_computation_cost_at (data, at, cost);
4701 }
4702
4703 /* Determines cost of computing the use in GROUP with CAND in a generic
4704    expression.  */
4705
4706 static bool
4707 determine_group_iv_cost_generic (struct ivopts_data *data,
4708                                  struct iv_group *group, struct iv_cand *cand)
4709 {
4710   comp_cost cost;
4711   iv_inv_expr_ent *inv_expr = NULL;
4712   bitmap inv_vars = NULL, inv_exprs = NULL;
4713   struct iv_use *use = group->vuses[0];
4714
4715   /* The simple case first -- if we need to express value of the preserved
4716      original biv, the cost is 0.  This also prevents us from counting the
4717      cost of increment twice -- once at this use and once in the cost of
4718      the candidate.  */
4719   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4720     cost = no_cost;
4721   else
4722     cost = get_computation_cost (data, use, cand, false,
4723                                  &inv_vars, NULL, &inv_expr);
4724
4725   if (inv_expr)
4726     {
4727       inv_exprs = BITMAP_ALLOC (NULL);
4728       bitmap_set_bit (inv_exprs, inv_expr->id);
4729     }
4730   set_group_iv_cost (data, group, cand, cost, inv_vars,
4731                      NULL_TREE, ERROR_MARK, inv_exprs);
4732   return !cost.infinite_cost_p ();
4733 }
4734
4735 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4736
4737 static bool
4738 determine_group_iv_cost_address (struct ivopts_data *data,
4739                                  struct iv_group *group, struct iv_cand *cand)
4740 {
4741   unsigned i;
4742   bitmap inv_vars = NULL, inv_exprs = NULL;
4743   bool can_autoinc;
4744   iv_inv_expr_ent *inv_expr = NULL;
4745   struct iv_use *use = group->vuses[0];
4746   comp_cost sum_cost = no_cost, cost;
4747
4748   cost = get_computation_cost (data, use, cand, true,
4749                                &inv_vars, &can_autoinc, &inv_expr);
4750
4751   if (inv_expr)
4752     {
4753       inv_exprs = BITMAP_ALLOC (NULL);
4754       bitmap_set_bit (inv_exprs, inv_expr->id);
4755     }
4756   sum_cost = cost;
4757   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4758     {
4759       if (can_autoinc)
4760         sum_cost -= cand->cost_step;
4761       /* If we generated the candidate solely for exploiting autoincrement
4762          opportunities, and it turns out it can't be used, set the cost to
4763          infinity to make sure we ignore it.  */
4764       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4765         sum_cost = infinite_cost;
4766     }
4767
4768   /* Uses in a group can share setup code, so only add setup cost once.  */
4769   cost -= cost.scratch;
4770   /* Compute and add costs for rest uses of this group.  */
4771   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4772     {
4773       struct iv_use *next = group->vuses[i];
4774
4775       /* TODO: We could skip computing cost for sub iv_use when it has the
4776          same cost as the first iv_use, but the cost really depends on the
4777          offset and where the iv_use is.  */
4778         cost = get_computation_cost (data, next, cand, true,
4779                                      NULL, &can_autoinc, &inv_expr);
4780         if (inv_expr)
4781           {
4782             if (!inv_exprs)
4783               inv_exprs = BITMAP_ALLOC (NULL);
4784
4785             bitmap_set_bit (inv_exprs, inv_expr->id);
4786           }
4787       sum_cost += cost;
4788     }
4789   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4790                      NULL_TREE, ERROR_MARK, inv_exprs);
4791
4792   return !sum_cost.infinite_cost_p ();
4793 }
4794
4795 /* Computes value of candidate CAND at position AT in iteration NITER, and
4796    stores it to VAL.  */
4797
4798 static void
4799 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4800                aff_tree *val)
4801 {
4802   aff_tree step, delta, nit;
4803   struct iv *iv = cand->iv;
4804   tree type = TREE_TYPE (iv->base);
4805   tree steptype;
4806   if (POINTER_TYPE_P (type))
4807     steptype = sizetype;
4808   else
4809     steptype = unsigned_type_for (type);
4810
4811   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4812   aff_combination_convert (&step, steptype);
4813   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4814   aff_combination_convert (&nit, steptype);
4815   aff_combination_mult (&nit, &step, &delta);
4816   if (stmt_after_increment (loop, cand, at))
4817     aff_combination_add (&delta, &step);
4818
4819   tree_to_aff_combination (iv->base, type, val);
4820   if (!POINTER_TYPE_P (type))
4821     aff_combination_convert (val, steptype);
4822   aff_combination_add (val, &delta);
4823 }
4824
4825 /* Returns period of induction variable iv.  */
4826
4827 static tree
4828 iv_period (struct iv *iv)
4829 {
4830   tree step = iv->step, period, type;
4831   tree pow2div;
4832
4833   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4834
4835   type = unsigned_type_for (TREE_TYPE (step));
4836   /* Period of the iv is lcm (step, type_range)/step -1,
4837      i.e., N*type_range/step - 1. Since type range is power
4838      of two, N == (step >> num_of_ending_zeros_binary (step),
4839      so the final result is
4840
4841        (type_range >> num_of_ending_zeros_binary (step)) - 1
4842
4843   */
4844   pow2div = num_ending_zeros (step);
4845
4846   period = build_low_bits_mask (type,
4847                                 (TYPE_PRECISION (type)
4848                                  - tree_to_uhwi (pow2div)));
4849
4850   return period;
4851 }
4852
4853 /* Returns the comparison operator used when eliminating the iv USE.  */
4854
4855 static enum tree_code
4856 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4857 {
4858   struct loop *loop = data->current_loop;
4859   basic_block ex_bb;
4860   edge exit;
4861
4862   ex_bb = gimple_bb (use->stmt);
4863   exit = EDGE_SUCC (ex_bb, 0);
4864   if (flow_bb_inside_loop_p (loop, exit->dest))
4865     exit = EDGE_SUCC (ex_bb, 1);
4866
4867   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4868 }
4869
4870 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4871    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4872    calculation is performed in non-wrapping type.
4873
4874    TODO: More generally, we could test for the situation that
4875          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4876          This would require knowing the sign of OFFSET.  */
4877
4878 static bool
4879 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4880 {
4881   enum tree_code code;
4882   tree e1, e2;
4883   aff_tree aff_e1, aff_e2, aff_offset;
4884
4885   if (!nowrap_type_p (TREE_TYPE (base)))
4886     return false;
4887
4888   base = expand_simple_operations (base);
4889
4890   if (TREE_CODE (base) == SSA_NAME)
4891     {
4892       gimple *stmt = SSA_NAME_DEF_STMT (base);
4893
4894       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4895         return false;
4896
4897       code = gimple_assign_rhs_code (stmt);
4898       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4899         return false;
4900
4901       e1 = gimple_assign_rhs1 (stmt);
4902       e2 = gimple_assign_rhs2 (stmt);
4903     }
4904   else
4905     {
4906       code = TREE_CODE (base);
4907       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4908         return false;
4909       e1 = TREE_OPERAND (base, 0);
4910       e2 = TREE_OPERAND (base, 1);
4911     }
4912
4913   /* Use affine expansion as deeper inspection to prove the equality.  */
4914   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4915                                   &aff_e2, &data->name_expansion_cache);
4916   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4917                                   &aff_offset, &data->name_expansion_cache);
4918   aff_combination_scale (&aff_offset, -1);
4919   switch (code)
4920     {
4921     case PLUS_EXPR:
4922       aff_combination_add (&aff_e2, &aff_offset);
4923       if (aff_combination_zero_p (&aff_e2))
4924         return true;
4925
4926       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4927                                       &aff_e1, &data->name_expansion_cache);
4928       aff_combination_add (&aff_e1, &aff_offset);
4929       return aff_combination_zero_p (&aff_e1);
4930
4931     case POINTER_PLUS_EXPR:
4932       aff_combination_add (&aff_e2, &aff_offset);
4933       return aff_combination_zero_p (&aff_e2);
4934
4935     default:
4936       return false;
4937     }
4938 }
4939
4940 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4941    comparison with CAND.  NITER describes the number of iterations of
4942    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4943
4944    We aim to handle the following situation:
4945
4946    sometype *base, *p;
4947    int a, b, i;
4948
4949    i = a;
4950    p = p_0 = base + a;
4951
4952    do
4953      {
4954        bla (*p);
4955        p++;
4956        i++;
4957      }
4958    while (i < b);
4959
4960    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4961    We aim to optimize this to
4962
4963    p = p_0 = base + a;
4964    do
4965      {
4966        bla (*p);
4967        p++;
4968      }
4969    while (p < p_0 - a + b);
4970
4971    This preserves the correctness, since the pointer arithmetics does not
4972    overflow.  More precisely:
4973
4974    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4975       overflow in computing it or the values of p.
4976    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4977       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4978
4979 static bool
4980 iv_elimination_compare_lt (struct ivopts_data *data,
4981                            struct iv_cand *cand, enum tree_code *comp_p,
4982                            struct tree_niter_desc *niter)
4983 {
4984   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4985   struct aff_tree nit, tmpa, tmpb;
4986   enum tree_code comp;
4987   HOST_WIDE_INT step;
4988
4989   /* We need to know that the candidate induction variable does not overflow.
4990      While more complex analysis may be used to prove this, for now just
4991      check that the variable appears in the original program and that it
4992      is computed in a type that guarantees no overflows.  */
4993   cand_type = TREE_TYPE (cand->iv->base);
4994   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4995     return false;
4996
4997   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4998      the calculation of the BOUND could overflow, making the comparison
4999      invalid.  */
5000   if (!data->loop_single_exit_p)
5001     return false;
5002
5003   /* We need to be able to decide whether candidate is increasing or decreasing
5004      in order to choose the right comparison operator.  */
5005   if (!cst_and_fits_in_hwi (cand->iv->step))
5006     return false;
5007   step = int_cst_value (cand->iv->step);
5008
5009   /* Check that the number of iterations matches the expected pattern:
5010      a + 1 > b ? 0 : b - a - 1.  */
5011   mbz = niter->may_be_zero;
5012   if (TREE_CODE (mbz) == GT_EXPR)
5013     {
5014       /* Handle a + 1 > b.  */
5015       tree op0 = TREE_OPERAND (mbz, 0);
5016       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5017         {
5018           a = TREE_OPERAND (op0, 0);
5019           b = TREE_OPERAND (mbz, 1);
5020         }
5021       else
5022         return false;
5023     }
5024   else if (TREE_CODE (mbz) == LT_EXPR)
5025     {
5026       tree op1 = TREE_OPERAND (mbz, 1);
5027
5028       /* Handle b < a + 1.  */
5029       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5030         {
5031           a = TREE_OPERAND (op1, 0);
5032           b = TREE_OPERAND (mbz, 0);
5033         }
5034       else
5035         return false;
5036     }
5037   else
5038     return false;
5039
5040   /* Expected number of iterations is B - A - 1.  Check that it matches
5041      the actual number, i.e., that B - A - NITER = 1.  */
5042   tree_to_aff_combination (niter->niter, nit_type, &nit);
5043   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5044   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5045   aff_combination_scale (&nit, -1);
5046   aff_combination_scale (&tmpa, -1);
5047   aff_combination_add (&tmpb, &tmpa);
5048   aff_combination_add (&tmpb, &nit);
5049   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5050     return false;
5051
5052   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5053      overflow.  */
5054   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5055                         cand->iv->step,
5056                         fold_convert (TREE_TYPE (cand->iv->step), a));
5057   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5058     return false;
5059
5060   /* Determine the new comparison operator.  */
5061   comp = step < 0 ? GT_EXPR : LT_EXPR;
5062   if (*comp_p == NE_EXPR)
5063     *comp_p = comp;
5064   else if (*comp_p == EQ_EXPR)
5065     *comp_p = invert_tree_comparison (comp, false);
5066   else
5067     gcc_unreachable ();
5068
5069   return true;
5070 }
5071
5072 /* Check whether it is possible to express the condition in USE by comparison
5073    of candidate CAND.  If so, store the value compared with to BOUND, and the
5074    comparison operator to COMP.  */
5075
5076 static bool
5077 may_eliminate_iv (struct ivopts_data *data,
5078                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5079                   enum tree_code *comp)
5080 {
5081   basic_block ex_bb;
5082   edge exit;
5083   tree period;
5084   struct loop *loop = data->current_loop;
5085   aff_tree bnd;
5086   struct tree_niter_desc *desc = NULL;
5087
5088   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5089     return false;
5090
5091   /* For now works only for exits that dominate the loop latch.
5092      TODO: extend to other conditions inside loop body.  */
5093   ex_bb = gimple_bb (use->stmt);
5094   if (use->stmt != last_stmt (ex_bb)
5095       || gimple_code (use->stmt) != GIMPLE_COND
5096       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5097     return false;
5098
5099   exit = EDGE_SUCC (ex_bb, 0);
5100   if (flow_bb_inside_loop_p (loop, exit->dest))
5101     exit = EDGE_SUCC (ex_bb, 1);
5102   if (flow_bb_inside_loop_p (loop, exit->dest))
5103     return false;
5104
5105   desc = niter_for_exit (data, exit);
5106   if (!desc)
5107     return false;
5108
5109   /* Determine whether we can use the variable to test the exit condition.
5110      This is the case iff the period of the induction variable is greater
5111      than the number of iterations for which the exit condition is true.  */
5112   period = iv_period (cand->iv);
5113
5114   /* If the number of iterations is constant, compare against it directly.  */
5115   if (TREE_CODE (desc->niter) == INTEGER_CST)
5116     {
5117       /* See cand_value_at.  */
5118       if (stmt_after_increment (loop, cand, use->stmt))
5119         {
5120           if (!tree_int_cst_lt (desc->niter, period))
5121             return false;
5122         }
5123       else
5124         {
5125           if (tree_int_cst_lt (period, desc->niter))
5126             return false;
5127         }
5128     }
5129
5130   /* If not, and if this is the only possible exit of the loop, see whether
5131      we can get a conservative estimate on the number of iterations of the
5132      entire loop and compare against that instead.  */
5133   else
5134     {
5135       widest_int period_value, max_niter;
5136
5137       max_niter = desc->max;
5138       if (stmt_after_increment (loop, cand, use->stmt))
5139         max_niter += 1;
5140       period_value = wi::to_widest (period);
5141       if (wi::gtu_p (max_niter, period_value))
5142         {
5143           /* See if we can take advantage of inferred loop bound
5144              information.  */
5145           if (data->loop_single_exit_p)
5146             {
5147               if (!max_loop_iterations (loop, &max_niter))
5148                 return false;
5149               /* The loop bound is already adjusted by adding 1.  */
5150               if (wi::gtu_p (max_niter, period_value))
5151                 return false;
5152             }
5153           else
5154             return false;
5155         }
5156     }
5157
5158   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5159
5160   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5161                          aff_combination_to_tree (&bnd));
5162   *comp = iv_elimination_compare (data, use);
5163
5164   /* It is unlikely that computing the number of iterations using division
5165      would be more profitable than keeping the original induction variable.  */
5166   if (expression_expensive_p (*bound))
5167     return false;
5168
5169   /* Sometimes, it is possible to handle the situation that the number of
5170      iterations may be zero unless additional assumptions by using <
5171      instead of != in the exit condition.
5172
5173      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5174            base the exit condition on it.  However, that is often too
5175            expensive.  */
5176   if (!integer_zerop (desc->may_be_zero))
5177     return iv_elimination_compare_lt (data, cand, comp, desc);
5178
5179   return true;
5180 }
5181
5182  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5183     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5184
5185 static int
5186 parm_decl_cost (struct ivopts_data *data, tree bound)
5187 {
5188   tree sbound = bound;
5189   STRIP_NOPS (sbound);
5190
5191   if (TREE_CODE (sbound) == SSA_NAME
5192       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5193       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5194       && data->body_includes_call)
5195     return COSTS_N_INSNS (1);
5196
5197   return 0;
5198 }
5199
5200 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5201
5202 static bool
5203 determine_group_iv_cost_cond (struct ivopts_data *data,
5204                               struct iv_group *group, struct iv_cand *cand)
5205 {
5206   tree bound = NULL_TREE;
5207   struct iv *cmp_iv;
5208   bitmap inv_exprs = NULL;
5209   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5210   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5211   enum comp_iv_rewrite rewrite_type;
5212   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5213   tree *control_var, *bound_cst;
5214   enum tree_code comp = ERROR_MARK;
5215   struct iv_use *use = group->vuses[0];
5216
5217   /* Extract condition operands.  */
5218   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5219                                         &bound_cst, NULL, &cmp_iv);
5220   gcc_assert (rewrite_type != COMP_IV_NA);
5221
5222   /* Try iv elimination.  */
5223   if (rewrite_type == COMP_IV_ELIM
5224       && may_eliminate_iv (data, use, cand, &bound, &comp))
5225     {
5226       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5227       if (elim_cost.cost == 0)
5228         elim_cost.cost = parm_decl_cost (data, bound);
5229       else if (TREE_CODE (bound) == INTEGER_CST)
5230         elim_cost.cost = 0;
5231       /* If we replace a loop condition 'i < n' with 'p < base + n',
5232          inv_vars_elim will have 'base' and 'n' set, which implies that both
5233          'base' and 'n' will be live during the loop.    More likely,
5234          'base + n' will be loop invariant, resulting in only one live value
5235          during the loop.  So in that case we clear inv_vars_elim and set
5236          inv_expr_elim instead.  */
5237       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5238         {
5239           inv_expr_elim = get_loop_invariant_expr (data, bound);
5240           bitmap_clear (inv_vars_elim);
5241         }
5242       /* The bound is a loop invariant, so it will be only computed
5243          once.  */
5244       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5245     }
5246
5247   /* When the condition is a comparison of the candidate IV against
5248      zero, prefer this IV.
5249
5250      TODO: The constant that we're subtracting from the cost should
5251      be target-dependent.  This information should be added to the
5252      target costs for each backend.  */
5253   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5254       && integer_zerop (*bound_cst)
5255       && (operand_equal_p (*control_var, cand->var_after, 0)
5256           || operand_equal_p (*control_var, cand->var_before, 0)))
5257     elim_cost -= 1;
5258
5259   express_cost = get_computation_cost (data, use, cand, false,
5260                                        &inv_vars_express, NULL,
5261                                        &inv_expr_express);
5262   if (cmp_iv != NULL)
5263     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5264
5265   /* Count the cost of the original bound as well.  */
5266   bound_cost = force_var_cost (data, *bound_cst, NULL);
5267   if (bound_cost.cost == 0)
5268     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5269   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5270     bound_cost.cost = 0;
5271   express_cost += bound_cost;
5272
5273   /* Choose the better approach, preferring the eliminated IV. */
5274   if (elim_cost <= express_cost)
5275     {
5276       cost = elim_cost;
5277       inv_vars = inv_vars_elim;
5278       inv_vars_elim = NULL;
5279       inv_expr = inv_expr_elim;
5280     }
5281   else
5282     {
5283       cost = express_cost;
5284       inv_vars = inv_vars_express;
5285       inv_vars_express = NULL;
5286       bound = NULL_TREE;
5287       comp = ERROR_MARK;
5288       inv_expr = inv_expr_express;
5289     }
5290
5291   if (inv_expr)
5292     {
5293       inv_exprs = BITMAP_ALLOC (NULL);
5294       bitmap_set_bit (inv_exprs, inv_expr->id);
5295     }
5296   set_group_iv_cost (data, group, cand, cost,
5297                      inv_vars, bound, comp, inv_exprs);
5298
5299   if (inv_vars_elim)
5300     BITMAP_FREE (inv_vars_elim);
5301   if (inv_vars_express)
5302     BITMAP_FREE (inv_vars_express);
5303
5304   return !cost.infinite_cost_p ();
5305 }
5306
5307 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5308    if USE cannot be represented with CAND.  */
5309
5310 static bool
5311 determine_group_iv_cost (struct ivopts_data *data,
5312                          struct iv_group *group, struct iv_cand *cand)
5313 {
5314   switch (group->type)
5315     {
5316     case USE_NONLINEAR_EXPR:
5317       return determine_group_iv_cost_generic (data, group, cand);
5318
5319     case USE_REF_ADDRESS:
5320     case USE_PTR_ADDRESS:
5321       return determine_group_iv_cost_address (data, group, cand);
5322
5323     case USE_COMPARE:
5324       return determine_group_iv_cost_cond (data, group, cand);
5325
5326     default:
5327       gcc_unreachable ();
5328     }
5329 }
5330
5331 /* Return true if get_computation_cost indicates that autoincrement is
5332    a possibility for the pair of USE and CAND, false otherwise.  */
5333
5334 static bool
5335 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5336                            struct iv_cand *cand)
5337 {
5338   if (!address_p (use->type))
5339     return false;
5340
5341   bool can_autoinc = false;
5342   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5343   return can_autoinc;
5344 }
5345
5346 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5347    use that allows autoincrement, and set their AINC_USE if possible.  */
5348
5349 static void
5350 set_autoinc_for_original_candidates (struct ivopts_data *data)
5351 {
5352   unsigned i, j;
5353
5354   for (i = 0; i < data->vcands.length (); i++)
5355     {
5356       struct iv_cand *cand = data->vcands[i];
5357       struct iv_use *closest_before = NULL;
5358       struct iv_use *closest_after = NULL;
5359       if (cand->pos != IP_ORIGINAL)
5360         continue;
5361
5362       for (j = 0; j < data->vgroups.length (); j++)
5363         {
5364           struct iv_group *group = data->vgroups[j];
5365           struct iv_use *use = group->vuses[0];
5366           unsigned uid = gimple_uid (use->stmt);
5367
5368           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5369             continue;
5370
5371           if (uid < gimple_uid (cand->incremented_at)
5372               && (closest_before == NULL
5373                   || uid > gimple_uid (closest_before->stmt)))
5374             closest_before = use;
5375
5376           if (uid > gimple_uid (cand->incremented_at)
5377               && (closest_after == NULL
5378                   || uid < gimple_uid (closest_after->stmt)))
5379             closest_after = use;
5380         }
5381
5382       if (closest_before != NULL
5383           && autoinc_possible_for_pair (data, closest_before, cand))
5384         cand->ainc_use = closest_before;
5385       else if (closest_after != NULL
5386                && autoinc_possible_for_pair (data, closest_after, cand))
5387         cand->ainc_use = closest_after;
5388     }
5389 }
5390
5391 /* Relate compare use with all candidates.  */
5392
5393 static void
5394 relate_compare_use_with_all_cands (struct ivopts_data *data)
5395 {
5396   unsigned i, count = data->vcands.length ();
5397   for (i = 0; i < data->vgroups.length (); i++)
5398     {
5399       struct iv_group *group = data->vgroups[i];
5400
5401       if (group->type == USE_COMPARE)
5402         bitmap_set_range (group->related_cands, 0, count);
5403     }
5404 }
5405
5406 /* Finds the candidates for the induction variables.  */
5407
5408 static void
5409 find_iv_candidates (struct ivopts_data *data)
5410 {
5411   /* Add commonly used ivs.  */
5412   add_standard_iv_candidates (data);
5413
5414   /* Add old induction variables.  */
5415   add_iv_candidate_for_bivs (data);
5416
5417   /* Add induction variables derived from uses.  */
5418   add_iv_candidate_for_groups (data);
5419
5420   set_autoinc_for_original_candidates (data);
5421
5422   /* Record the important candidates.  */
5423   record_important_candidates (data);
5424
5425   /* Relate compare iv_use with all candidates.  */
5426   if (!data->consider_all_candidates)
5427     relate_compare_use_with_all_cands (data);
5428
5429   if (dump_file && (dump_flags & TDF_DETAILS))
5430     {
5431       unsigned i;
5432
5433       fprintf (dump_file, "\n<Important Candidates>:\t");
5434       for (i = 0; i < data->vcands.length (); i++)
5435         if (data->vcands[i]->important)
5436           fprintf (dump_file, " %d,", data->vcands[i]->id);
5437       fprintf (dump_file, "\n");
5438
5439       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5440       for (i = 0; i < data->vgroups.length (); i++)
5441         {
5442           struct iv_group *group = data->vgroups[i];
5443
5444           if (group->related_cands)
5445             {
5446               fprintf (dump_file, "  Group %d:\t", group->id);
5447               dump_bitmap (dump_file, group->related_cands);
5448             }
5449         }
5450       fprintf (dump_file, "\n");
5451     }
5452 }
5453
5454 /* Determines costs of computing use of iv with an iv candidate.  */
5455
5456 static void
5457 determine_group_iv_costs (struct ivopts_data *data)
5458 {
5459   unsigned i, j;
5460   struct iv_cand *cand;
5461   struct iv_group *group;
5462   bitmap to_clear = BITMAP_ALLOC (NULL);
5463
5464   alloc_use_cost_map (data);
5465
5466   for (i = 0; i < data->vgroups.length (); i++)
5467     {
5468       group = data->vgroups[i];
5469
5470       if (data->consider_all_candidates)
5471         {
5472           for (j = 0; j < data->vcands.length (); j++)
5473             {
5474               cand = data->vcands[j];
5475               determine_group_iv_cost (data, group, cand);
5476             }
5477         }
5478       else
5479         {
5480           bitmap_iterator bi;
5481
5482           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5483             {
5484               cand = data->vcands[j];
5485               if (!determine_group_iv_cost (data, group, cand))
5486                 bitmap_set_bit (to_clear, j);
5487             }
5488
5489           /* Remove the candidates for that the cost is infinite from
5490              the list of related candidates.  */
5491           bitmap_and_compl_into (group->related_cands, to_clear);
5492           bitmap_clear (to_clear);
5493         }
5494     }
5495
5496   BITMAP_FREE (to_clear);
5497
5498   if (dump_file && (dump_flags & TDF_DETAILS))
5499     {
5500       bitmap_iterator bi;
5501
5502       /* Dump invariant variables.  */
5503       fprintf (dump_file, "\n<Invariant Vars>:\n");
5504       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5505         {
5506           struct version_info *info = ver_info (data, i);
5507           if (info->inv_id)
5508             {
5509               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5510               print_generic_expr (dump_file, info->name, TDF_SLIM);
5511               fprintf (dump_file, "%s\n",
5512                        info->has_nonlin_use ? "" : "\t(eliminable)");
5513             }
5514         }
5515
5516       /* Dump invariant expressions.  */
5517       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5518       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5519
5520       for (hash_table<iv_inv_expr_hasher>::iterator it
5521            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5522            ++it)
5523         list.safe_push (*it);
5524
5525       list.qsort (sort_iv_inv_expr_ent);
5526
5527       for (i = 0; i < list.length (); ++i)
5528         {
5529           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5530           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5531           fprintf (dump_file, "\n");
5532         }
5533
5534       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5535
5536       for (i = 0; i < data->vgroups.length (); i++)
5537         {
5538           group = data->vgroups[i];
5539
5540           fprintf (dump_file, "Group %d:\n", i);
5541           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5542           for (j = 0; j < group->n_map_members; j++)
5543             {
5544               if (!group->cost_map[j].cand
5545                   || group->cost_map[j].cost.infinite_cost_p ())
5546                 continue;
5547
5548               fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
5549                        group->cost_map[j].cand->id,
5550                        group->cost_map[j].cost.cost,
5551                        group->cost_map[j].cost.complexity);
5552               if (!group->cost_map[j].inv_exprs
5553                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5554                 fprintf (dump_file, "NIL;\t");
5555               else
5556                 bitmap_print (dump_file,
5557                               group->cost_map[j].inv_exprs, "", ";\t");
5558               if (!group->cost_map[j].inv_vars
5559                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5560                 fprintf (dump_file, "NIL;\n");
5561               else
5562                 bitmap_print (dump_file,
5563                               group->cost_map[j].inv_vars, "", "\n");
5564             }
5565
5566           fprintf (dump_file, "\n");
5567         }
5568       fprintf (dump_file, "\n");
5569     }
5570 }
5571
5572 /* Determines cost of the candidate CAND.  */
5573
5574 static void
5575 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5576 {
5577   comp_cost cost_base;
5578   int64_t cost, cost_step;
5579   tree base;
5580
5581   gcc_assert (cand->iv != NULL);
5582
5583   /* There are two costs associated with the candidate -- its increment
5584      and its initialization.  The second is almost negligible for any loop
5585      that rolls enough, so we take it just very little into account.  */
5586
5587   base = cand->iv->base;
5588   cost_base = force_var_cost (data, base, NULL);
5589   /* It will be exceptional that the iv register happens to be initialized with
5590      the proper value at no cost.  In general, there will at least be a regcopy
5591      or a const set.  */
5592   if (cost_base.cost == 0)
5593     cost_base.cost = COSTS_N_INSNS (1);
5594   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5595
5596   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5597
5598   /* Prefer the original ivs unless we may gain something by replacing it.
5599      The reason is to make debugging simpler; so this is not relevant for
5600      artificial ivs created by other optimization passes.  */
5601   if (cand->pos != IP_ORIGINAL
5602       || !SSA_NAME_VAR (cand->var_before)
5603       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5604     cost++;
5605
5606   /* Prefer not to insert statements into latch unless there are some
5607      already (so that we do not create unnecessary jumps).  */
5608   if (cand->pos == IP_END
5609       && empty_block_p (ip_end_pos (data->current_loop)))
5610     cost++;
5611
5612   cand->cost = cost;
5613   cand->cost_step = cost_step;
5614 }
5615
5616 /* Determines costs of computation of the candidates.  */
5617
5618 static void
5619 determine_iv_costs (struct ivopts_data *data)
5620 {
5621   unsigned i;
5622
5623   if (dump_file && (dump_flags & TDF_DETAILS))
5624     {
5625       fprintf (dump_file, "<Candidate Costs>:\n");
5626       fprintf (dump_file, "  cand\tcost\n");
5627     }
5628
5629   for (i = 0; i < data->vcands.length (); i++)
5630     {
5631       struct iv_cand *cand = data->vcands[i];
5632
5633       determine_iv_cost (data, cand);
5634
5635       if (dump_file && (dump_flags & TDF_DETAILS))
5636         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5637     }
5638
5639   if (dump_file && (dump_flags & TDF_DETAILS))
5640     fprintf (dump_file, "\n");
5641 }
5642
5643 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5644    induction variables.  Note N_INVS includes both invariant variables and
5645    invariant expressions.  */
5646
5647 static unsigned
5648 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5649                               unsigned n_cands)
5650 {
5651   unsigned cost;
5652   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5653   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5654   bool speed = data->speed;
5655
5656   /* If there is a call in the loop body, the call-clobbered registers
5657      are not available for loop invariants.  */
5658   if (data->body_includes_call)
5659     available_regs = available_regs - target_clobbered_regs;
5660
5661   /* If we have enough registers.  */
5662   if (regs_needed + target_res_regs < available_regs)
5663     cost = n_new;
5664   /* If close to running out of registers, try to preserve them.  */
5665   else if (regs_needed <= available_regs)
5666     cost = target_reg_cost [speed] * regs_needed;
5667   /* If we run out of available registers but the number of candidates
5668      does not, we penalize extra registers using target_spill_cost.  */
5669   else if (n_cands <= available_regs)
5670     cost = target_reg_cost [speed] * available_regs
5671            + target_spill_cost [speed] * (regs_needed - available_regs);
5672   /* If the number of candidates runs out available registers, we penalize
5673      extra candidate registers using target_spill_cost * 2.  Because it is
5674      more expensive to spill induction variable than invariant.  */
5675   else
5676     cost = target_reg_cost [speed] * available_regs
5677            + target_spill_cost [speed] * (n_cands - available_regs) * 2
5678            + target_spill_cost [speed] * (regs_needed - n_cands);
5679
5680   /* Finally, add the number of candidates, so that we prefer eliminating
5681      induction variables if possible.  */
5682   return cost + n_cands;
5683 }
5684
5685 /* For each size of the induction variable set determine the penalty.  */
5686
5687 static void
5688 determine_set_costs (struct ivopts_data *data)
5689 {
5690   unsigned j, n;
5691   gphi *phi;
5692   gphi_iterator psi;
5693   tree op;
5694   struct loop *loop = data->current_loop;
5695   bitmap_iterator bi;
5696
5697   if (dump_file && (dump_flags & TDF_DETAILS))
5698     {
5699       fprintf (dump_file, "<Global Costs>:\n");
5700       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5701       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5702       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5703       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5704     }
5705
5706   n = 0;
5707   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5708     {
5709       phi = psi.phi ();
5710       op = PHI_RESULT (phi);
5711
5712       if (virtual_operand_p (op))
5713         continue;
5714
5715       if (get_iv (data, op))
5716         continue;
5717
5718       if (!POINTER_TYPE_P (TREE_TYPE (op))
5719           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5720         continue;
5721
5722       n++;
5723     }
5724
5725   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5726     {
5727       struct version_info *info = ver_info (data, j);
5728
5729       if (info->inv_id && info->has_nonlin_use)
5730         n++;
5731     }
5732
5733   data->regs_used = n;
5734   if (dump_file && (dump_flags & TDF_DETAILS))
5735     fprintf (dump_file, "  regs_used %d\n", n);
5736
5737   if (dump_file && (dump_flags & TDF_DETAILS))
5738     {
5739       fprintf (dump_file, "  cost for size:\n");
5740       fprintf (dump_file, "  ivs\tcost\n");
5741       for (j = 0; j <= 2 * target_avail_regs; j++)
5742         fprintf (dump_file, "  %d\t%d\n", j,
5743                  ivopts_estimate_reg_pressure (data, 0, j));
5744       fprintf (dump_file, "\n");
5745     }
5746 }
5747
5748 /* Returns true if A is a cheaper cost pair than B.  */
5749
5750 static bool
5751 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5752 {
5753   if (!a)
5754     return false;
5755
5756   if (!b)
5757     return true;
5758
5759   if (a->cost < b->cost)
5760     return true;
5761
5762   if (b->cost < a->cost)
5763     return false;
5764
5765   /* In case the costs are the same, prefer the cheaper candidate.  */
5766   if (a->cand->cost < b->cand->cost)
5767     return true;
5768
5769   return false;
5770 }
5771
5772 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5773    for more expensive, equal and cheaper respectively.  */
5774
5775 static int
5776 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5777 {
5778   if (cheaper_cost_pair (a, b))
5779     return -1;
5780   if (cheaper_cost_pair (b, a))
5781     return 1;
5782
5783   return 0;
5784 }
5785
5786 /* Returns candidate by that USE is expressed in IVS.  */
5787
5788 static struct cost_pair *
5789 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5790 {
5791   return ivs->cand_for_group[group->id];
5792 }
5793
5794 /* Computes the cost field of IVS structure.  */
5795
5796 static void
5797 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5798 {
5799   comp_cost cost = ivs->cand_use_cost;
5800
5801   cost += ivs->cand_cost;
5802   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5803   ivs->cost = cost;
5804 }
5805
5806 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5807    and IVS.  */
5808
5809 static void
5810 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5811 {
5812   bitmap_iterator bi;
5813   unsigned iid;
5814
5815   if (!invs)
5816     return;
5817
5818   gcc_assert (n_inv_uses != NULL);
5819   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5820     {
5821       n_inv_uses[iid]--;
5822       if (n_inv_uses[iid] == 0)
5823         ivs->n_invs--;
5824     }
5825 }
5826
5827 /* Set USE not to be expressed by any candidate in IVS.  */
5828
5829 static void
5830 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5831                  struct iv_group *group)
5832 {
5833   unsigned gid = group->id, cid;
5834   struct cost_pair *cp;
5835
5836   cp = ivs->cand_for_group[gid];
5837   if (!cp)
5838     return;
5839   cid = cp->cand->id;
5840
5841   ivs->bad_groups++;
5842   ivs->cand_for_group[gid] = NULL;
5843   ivs->n_cand_uses[cid]--;
5844
5845   if (ivs->n_cand_uses[cid] == 0)
5846     {
5847       bitmap_clear_bit (ivs->cands, cid);
5848       ivs->n_cands--;
5849       ivs->cand_cost -= cp->cand->cost;
5850       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5851       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5852     }
5853
5854   ivs->cand_use_cost -= cp->cost;
5855   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5856   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5857   iv_ca_recount_cost (data, ivs);
5858 }
5859
5860 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5861    IVS.  */
5862
5863 static void
5864 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5865 {
5866   bitmap_iterator bi;
5867   unsigned iid;
5868
5869   if (!invs)
5870     return;
5871
5872   gcc_assert (n_inv_uses != NULL);
5873   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5874     {
5875       n_inv_uses[iid]++;
5876       if (n_inv_uses[iid] == 1)
5877         ivs->n_invs++;
5878     }
5879 }
5880
5881 /* Set cost pair for GROUP in set IVS to CP.  */
5882
5883 static void
5884 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5885               struct iv_group *group, struct cost_pair *cp)
5886 {
5887   unsigned gid = group->id, cid;
5888
5889   if (ivs->cand_for_group[gid] == cp)
5890     return;
5891
5892   if (ivs->cand_for_group[gid])
5893     iv_ca_set_no_cp (data, ivs, group);
5894
5895   if (cp)
5896     {
5897       cid = cp->cand->id;
5898
5899       ivs->bad_groups--;
5900       ivs->cand_for_group[gid] = cp;
5901       ivs->n_cand_uses[cid]++;
5902       if (ivs->n_cand_uses[cid] == 1)
5903         {
5904           bitmap_set_bit (ivs->cands, cid);
5905           ivs->n_cands++;
5906           ivs->cand_cost += cp->cand->cost;
5907           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5908           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5909         }
5910
5911       ivs->cand_use_cost += cp->cost;
5912       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5913       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5914       iv_ca_recount_cost (data, ivs);
5915     }
5916 }
5917
5918 /* Extend set IVS by expressing USE by some of the candidates in it
5919    if possible.  Consider all important candidates if candidates in
5920    set IVS don't give any result.  */
5921
5922 static void
5923 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5924                struct iv_group *group)
5925 {
5926   struct cost_pair *best_cp = NULL, *cp;
5927   bitmap_iterator bi;
5928   unsigned i;
5929   struct iv_cand *cand;
5930
5931   gcc_assert (ivs->upto >= group->id);
5932   ivs->upto++;
5933   ivs->bad_groups++;
5934
5935   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5936     {
5937       cand = data->vcands[i];
5938       cp = get_group_iv_cost (data, group, cand);
5939       if (cheaper_cost_pair (cp, best_cp))
5940         best_cp = cp;
5941     }
5942
5943   if (best_cp == NULL)
5944     {
5945       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5946         {
5947           cand = data->vcands[i];
5948           cp = get_group_iv_cost (data, group, cand);
5949           if (cheaper_cost_pair (cp, best_cp))
5950             best_cp = cp;
5951         }
5952     }
5953
5954   iv_ca_set_cp (data, ivs, group, best_cp);
5955 }
5956
5957 /* Get cost for assignment IVS.  */
5958
5959 static comp_cost
5960 iv_ca_cost (struct iv_ca *ivs)
5961 {
5962   /* This was a conditional expression but it triggered a bug in
5963      Sun C 5.5.  */
5964   if (ivs->bad_groups)
5965     return infinite_cost;
5966   else
5967     return ivs->cost;
5968 }
5969
5970 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5971    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
5972    respectively.  */
5973
5974 static int
5975 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5976                     struct iv_group *group, struct cost_pair *old_cp,
5977                     struct cost_pair *new_cp)
5978 {
5979   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5980   unsigned old_n_invs = ivs->n_invs;
5981   iv_ca_set_cp (data, ivs, group, new_cp);
5982   unsigned new_n_invs = ivs->n_invs;
5983   iv_ca_set_cp (data, ivs, group, old_cp);
5984
5985   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5986 }
5987
5988 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5989    it before NEXT.  */
5990
5991 static struct iv_ca_delta *
5992 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5993                  struct cost_pair *new_cp, struct iv_ca_delta *next)
5994 {
5995   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5996
5997   change->group = group;
5998   change->old_cp = old_cp;
5999   change->new_cp = new_cp;
6000   change->next = next;
6001
6002   return change;
6003 }
6004
6005 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6006    are rewritten.  */
6007
6008 static struct iv_ca_delta *
6009 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6010 {
6011   struct iv_ca_delta *last;
6012
6013   if (!l2)
6014     return l1;
6015
6016   if (!l1)
6017     return l2;
6018
6019   for (last = l1; last->next; last = last->next)
6020     continue;
6021   last->next = l2;
6022
6023   return l1;
6024 }
6025
6026 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6027
6028 static struct iv_ca_delta *
6029 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6030 {
6031   struct iv_ca_delta *act, *next, *prev = NULL;
6032
6033   for (act = delta; act; act = next)
6034     {
6035       next = act->next;
6036       act->next = prev;
6037       prev = act;
6038
6039       std::swap (act->old_cp, act->new_cp);
6040     }
6041
6042   return prev;
6043 }
6044
6045 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6046    reverted instead.  */
6047
6048 static void
6049 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6050                     struct iv_ca_delta *delta, bool forward)
6051 {
6052   struct cost_pair *from, *to;
6053   struct iv_ca_delta *act;
6054
6055   if (!forward)
6056     delta = iv_ca_delta_reverse (delta);
6057
6058   for (act = delta; act; act = act->next)
6059     {
6060       from = act->old_cp;
6061       to = act->new_cp;
6062       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6063       iv_ca_set_cp (data, ivs, act->group, to);
6064     }
6065
6066   if (!forward)
6067     iv_ca_delta_reverse (delta);
6068 }
6069
6070 /* Returns true if CAND is used in IVS.  */
6071
6072 static bool
6073 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6074 {
6075   return ivs->n_cand_uses[cand->id] > 0;
6076 }
6077
6078 /* Returns number of induction variable candidates in the set IVS.  */
6079
6080 static unsigned
6081 iv_ca_n_cands (struct iv_ca *ivs)
6082 {
6083   return ivs->n_cands;
6084 }
6085
6086 /* Free the list of changes DELTA.  */
6087
6088 static void
6089 iv_ca_delta_free (struct iv_ca_delta **delta)
6090 {
6091   struct iv_ca_delta *act, *next;
6092
6093   for (act = *delta; act; act = next)
6094     {
6095       next = act->next;
6096       free (act);
6097     }
6098
6099   *delta = NULL;
6100 }
6101
6102 /* Allocates new iv candidates assignment.  */
6103
6104 static struct iv_ca *
6105 iv_ca_new (struct ivopts_data *data)
6106 {
6107   struct iv_ca *nw = XNEW (struct iv_ca);
6108
6109   nw->upto = 0;
6110   nw->bad_groups = 0;
6111   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6112                                  data->vgroups.length ());
6113   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6114   nw->cands = BITMAP_ALLOC (NULL);
6115   nw->n_cands = 0;
6116   nw->n_invs = 0;
6117   nw->cand_use_cost = no_cost;
6118   nw->cand_cost = 0;
6119   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6120   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6121   nw->cost = no_cost;
6122
6123   return nw;
6124 }
6125
6126 /* Free memory occupied by the set IVS.  */
6127
6128 static void
6129 iv_ca_free (struct iv_ca **ivs)
6130 {
6131   free ((*ivs)->cand_for_group);
6132   free ((*ivs)->n_cand_uses);
6133   BITMAP_FREE ((*ivs)->cands);
6134   free ((*ivs)->n_inv_var_uses);
6135   free ((*ivs)->n_inv_expr_uses);
6136   free (*ivs);
6137   *ivs = NULL;
6138 }
6139
6140 /* Dumps IVS to FILE.  */
6141
6142 static void
6143 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6144 {
6145   unsigned i;
6146   comp_cost cost = iv_ca_cost (ivs);
6147
6148   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6149            cost.complexity);
6150   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6151            "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6152            ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6153   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6154
6155   for (i = 0; i < ivs->upto; i++)
6156     {
6157       struct iv_group *group = data->vgroups[i];
6158       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6159       if (cp)
6160         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6161                  "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6162                  cp->cost.cost, cp->cost.complexity);
6163       else
6164         fprintf (file, "   group:%d --> ??\n", group->id);
6165     }
6166
6167   const char *pref = "";
6168   fprintf (file, "  invariant variables: ");
6169   for (i = 1; i <= data->max_inv_var_id; i++)
6170     if (ivs->n_inv_var_uses[i])
6171       {
6172         fprintf (file, "%s%d", pref, i);
6173         pref = ", ";
6174       }
6175
6176   pref = "";
6177   fprintf (file, "\n  invariant expressions: ");
6178   for (i = 1; i <= data->max_inv_expr_id; i++)
6179     if (ivs->n_inv_expr_uses[i])
6180       {
6181         fprintf (file, "%s%d", pref, i);
6182         pref = ", ";
6183       }
6184
6185   fprintf (file, "\n\n");
6186 }
6187
6188 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6189    new set, and store differences in DELTA.  Number of induction variables
6190    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6191    the function will try to find a solution with mimimal iv candidates.  */
6192
6193 static comp_cost
6194 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6195               struct iv_cand *cand, struct iv_ca_delta **delta,
6196               unsigned *n_ivs, bool min_ncand)
6197 {
6198   unsigned i;
6199   comp_cost cost;
6200   struct iv_group *group;
6201   struct cost_pair *old_cp, *new_cp;
6202
6203   *delta = NULL;
6204   for (i = 0; i < ivs->upto; i++)
6205     {
6206       group = data->vgroups[i];
6207       old_cp = iv_ca_cand_for_group (ivs, group);
6208
6209       if (old_cp
6210           && old_cp->cand == cand)
6211         continue;
6212
6213       new_cp = get_group_iv_cost (data, group, cand);
6214       if (!new_cp)
6215         continue;
6216
6217       if (!min_ncand)
6218         {
6219           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6220           /* Skip if new_cp depends on more invariants.  */
6221           if (cmp_invs > 0)
6222             continue;
6223
6224           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6225           /* Skip if new_cp is not cheaper.  */
6226           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6227             continue;
6228         }
6229
6230       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6231     }
6232
6233   iv_ca_delta_commit (data, ivs, *delta, true);
6234   cost = iv_ca_cost (ivs);
6235   if (n_ivs)
6236     *n_ivs = iv_ca_n_cands (ivs);
6237   iv_ca_delta_commit (data, ivs, *delta, false);
6238
6239   return cost;
6240 }
6241
6242 /* Try narrowing set IVS by removing CAND.  Return the cost of
6243    the new set and store the differences in DELTA.  START is
6244    the candidate with which we start narrowing.  */
6245
6246 static comp_cost
6247 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6248               struct iv_cand *cand, struct iv_cand *start,
6249               struct iv_ca_delta **delta)
6250 {
6251   unsigned i, ci;
6252   struct iv_group *group;
6253   struct cost_pair *old_cp, *new_cp, *cp;
6254   bitmap_iterator bi;
6255   struct iv_cand *cnd;
6256   comp_cost cost, best_cost, acost;
6257
6258   *delta = NULL;
6259   for (i = 0; i < data->vgroups.length (); i++)
6260     {
6261       group = data->vgroups[i];
6262
6263       old_cp = iv_ca_cand_for_group (ivs, group);
6264       if (old_cp->cand != cand)
6265         continue;
6266
6267       best_cost = iv_ca_cost (ivs);
6268       /* Start narrowing with START.  */
6269       new_cp = get_group_iv_cost (data, group, start);
6270
6271       if (data->consider_all_candidates)
6272         {
6273           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6274             {
6275               if (ci == cand->id || (start && ci == start->id))
6276                 continue;
6277
6278               cnd = data->vcands[ci];
6279
6280               cp = get_group_iv_cost (data, group, cnd);
6281               if (!cp)
6282                 continue;
6283
6284               iv_ca_set_cp (data, ivs, group, cp);
6285               acost = iv_ca_cost (ivs);
6286
6287               if (acost < best_cost)
6288                 {
6289                   best_cost = acost;
6290                   new_cp = cp;
6291                 }
6292             }
6293         }
6294       else
6295         {
6296           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6297             {
6298               if (ci == cand->id || (start && ci == start->id))
6299                 continue;
6300
6301               cnd = data->vcands[ci];
6302
6303               cp = get_group_iv_cost (data, group, cnd);
6304               if (!cp)
6305                 continue;
6306
6307               iv_ca_set_cp (data, ivs, group, cp);
6308               acost = iv_ca_cost (ivs);
6309
6310               if (acost < best_cost)
6311                 {
6312                   best_cost = acost;
6313                   new_cp = cp;
6314                 }
6315             }
6316         }
6317       /* Restore to old cp for use.  */
6318       iv_ca_set_cp (data, ivs, group, old_cp);
6319
6320       if (!new_cp)
6321         {
6322           iv_ca_delta_free (delta);
6323           return infinite_cost;
6324         }
6325
6326       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6327     }
6328
6329   iv_ca_delta_commit (data, ivs, *delta, true);
6330   cost = iv_ca_cost (ivs);
6331   iv_ca_delta_commit (data, ivs, *delta, false);
6332
6333   return cost;
6334 }
6335
6336 /* Try optimizing the set of candidates IVS by removing candidates different
6337    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6338    differences in DELTA.  */
6339
6340 static comp_cost
6341 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6342              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6343 {
6344   bitmap_iterator bi;
6345   struct iv_ca_delta *act_delta, *best_delta;
6346   unsigned i;
6347   comp_cost best_cost, acost;
6348   struct iv_cand *cand;
6349
6350   best_delta = NULL;
6351   best_cost = iv_ca_cost (ivs);
6352
6353   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6354     {
6355       cand = data->vcands[i];
6356
6357       if (cand == except_cand)
6358         continue;
6359
6360       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6361
6362       if (acost < best_cost)
6363         {
6364           best_cost = acost;
6365           iv_ca_delta_free (&best_delta);
6366           best_delta = act_delta;
6367         }
6368       else
6369         iv_ca_delta_free (&act_delta);
6370     }
6371
6372   if (!best_delta)
6373     {
6374       *delta = NULL;
6375       return best_cost;
6376     }
6377
6378   /* Recurse to possibly remove other unnecessary ivs.  */
6379   iv_ca_delta_commit (data, ivs, best_delta, true);
6380   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6381   iv_ca_delta_commit (data, ivs, best_delta, false);
6382   *delta = iv_ca_delta_join (best_delta, *delta);
6383   return best_cost;
6384 }
6385
6386 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6387    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6388    the corresponding cost_pair, otherwise just return BEST_CP.  */
6389
6390 static struct cost_pair*
6391 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6392                         unsigned int cand_idx, struct iv_cand *old_cand,
6393                         struct cost_pair *best_cp)
6394 {
6395   struct iv_cand *cand;
6396   struct cost_pair *cp;
6397
6398   gcc_assert (old_cand != NULL && best_cp != NULL);
6399   if (cand_idx == old_cand->id)
6400     return best_cp;
6401
6402   cand = data->vcands[cand_idx];
6403   cp = get_group_iv_cost (data, group, cand);
6404   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6405     return cp;
6406
6407   return best_cp;
6408 }
6409
6410 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6411    which are used by more than one iv uses.  For each of those candidates,
6412    this function tries to represent iv uses under that candidate using
6413    other ones with lower local cost, then tries to prune the new set.
6414    If the new set has lower cost, It returns the new cost after recording
6415    candidate replacement in list DELTA.  */
6416
6417 static comp_cost
6418 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6419                struct iv_ca_delta **delta)
6420 {
6421   bitmap_iterator bi, bj;
6422   unsigned int i, j, k;
6423   struct iv_cand *cand;
6424   comp_cost orig_cost, acost;
6425   struct iv_ca_delta *act_delta, *tmp_delta;
6426   struct cost_pair *old_cp, *best_cp = NULL;
6427
6428   *delta = NULL;
6429   orig_cost = iv_ca_cost (ivs);
6430
6431   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6432     {
6433       if (ivs->n_cand_uses[i] == 1
6434           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6435         continue;
6436
6437       cand = data->vcands[i];
6438
6439       act_delta = NULL;
6440       /*  Represent uses under current candidate using other ones with
6441           lower local cost.  */
6442       for (j = 0; j < ivs->upto; j++)
6443         {
6444           struct iv_group *group = data->vgroups[j];
6445           old_cp = iv_ca_cand_for_group (ivs, group);
6446
6447           if (old_cp->cand != cand)
6448             continue;
6449
6450           best_cp = old_cp;
6451           if (data->consider_all_candidates)
6452             for (k = 0; k < data->vcands.length (); k++)
6453               best_cp = cheaper_cost_with_cand (data, group, k,
6454                                                 old_cp->cand, best_cp);
6455           else
6456             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6457               best_cp = cheaper_cost_with_cand (data, group, k,
6458                                                 old_cp->cand, best_cp);
6459
6460           if (best_cp == old_cp)
6461             continue;
6462
6463           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6464         }
6465       /* No need for further prune.  */
6466       if (!act_delta)
6467         continue;
6468
6469       /* Prune the new candidate set.  */
6470       iv_ca_delta_commit (data, ivs, act_delta, true);
6471       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6472       iv_ca_delta_commit (data, ivs, act_delta, false);
6473       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6474
6475       if (acost < orig_cost)
6476         {
6477           *delta = act_delta;
6478           return acost;
6479         }
6480       else
6481         iv_ca_delta_free (&act_delta);
6482     }
6483
6484   return orig_cost;
6485 }
6486
6487 /* Tries to extend the sets IVS in the best possible way in order to
6488    express the GROUP.  If ORIGINALP is true, prefer candidates from
6489    the original set of IVs, otherwise favor important candidates not
6490    based on any memory object.  */
6491
6492 static bool
6493 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6494                   struct iv_group *group, bool originalp)
6495 {
6496   comp_cost best_cost, act_cost;
6497   unsigned i;
6498   bitmap_iterator bi;
6499   struct iv_cand *cand;
6500   struct iv_ca_delta *best_delta = NULL, *act_delta;
6501   struct cost_pair *cp;
6502
6503   iv_ca_add_group (data, ivs, group);
6504   best_cost = iv_ca_cost (ivs);
6505   cp = iv_ca_cand_for_group (ivs, group);
6506   if (cp)
6507     {
6508       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6509       iv_ca_set_no_cp (data, ivs, group);
6510     }
6511
6512   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6513      first try important candidates not based on any memory object.  Only if
6514      this fails, try the specific ones.  Rationale -- in loops with many
6515      variables the best choice often is to use just one generic biv.  If we
6516      added here many ivs specific to the uses, the optimization algorithm later
6517      would be likely to get stuck in a local minimum, thus causing us to create
6518      too many ivs.  The approach from few ivs to more seems more likely to be
6519      successful -- starting from few ivs, replacing an expensive use by a
6520      specific iv should always be a win.  */
6521   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6522     {
6523       cand = data->vcands[i];
6524
6525       if (originalp && cand->pos !=IP_ORIGINAL)
6526         continue;
6527
6528       if (!originalp && cand->iv->base_object != NULL_TREE)
6529         continue;
6530
6531       if (iv_ca_cand_used_p (ivs, cand))
6532         continue;
6533
6534       cp = get_group_iv_cost (data, group, cand);
6535       if (!cp)
6536         continue;
6537
6538       iv_ca_set_cp (data, ivs, group, cp);
6539       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6540                                true);
6541       iv_ca_set_no_cp (data, ivs, group);
6542       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6543
6544       if (act_cost < best_cost)
6545         {
6546           best_cost = act_cost;
6547
6548           iv_ca_delta_free (&best_delta);
6549           best_delta = act_delta;
6550         }
6551       else
6552         iv_ca_delta_free (&act_delta);
6553     }
6554
6555   if (best_cost.infinite_cost_p ())
6556     {
6557       for (i = 0; i < group->n_map_members; i++)
6558         {
6559           cp = group->cost_map + i;
6560           cand = cp->cand;
6561           if (!cand)
6562             continue;
6563
6564           /* Already tried this.  */
6565           if (cand->important)
6566             {
6567               if (originalp && cand->pos == IP_ORIGINAL)
6568                 continue;
6569               if (!originalp && cand->iv->base_object == NULL_TREE)
6570                 continue;
6571             }
6572
6573           if (iv_ca_cand_used_p (ivs, cand))
6574             continue;
6575
6576           act_delta = NULL;
6577           iv_ca_set_cp (data, ivs, group, cp);
6578           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6579           iv_ca_set_no_cp (data, ivs, group);
6580           act_delta = iv_ca_delta_add (group,
6581                                        iv_ca_cand_for_group (ivs, group),
6582                                        cp, act_delta);
6583
6584           if (act_cost < best_cost)
6585             {
6586               best_cost = act_cost;
6587
6588               if (best_delta)
6589                 iv_ca_delta_free (&best_delta);
6590               best_delta = act_delta;
6591             }
6592           else
6593             iv_ca_delta_free (&act_delta);
6594         }
6595     }
6596
6597   iv_ca_delta_commit (data, ivs, best_delta, true);
6598   iv_ca_delta_free (&best_delta);
6599
6600   return !best_cost.infinite_cost_p ();
6601 }
6602
6603 /* Finds an initial assignment of candidates to uses.  */
6604
6605 static struct iv_ca *
6606 get_initial_solution (struct ivopts_data *data, bool originalp)
6607 {
6608   unsigned i;
6609   struct iv_ca *ivs = iv_ca_new (data);
6610
6611   for (i = 0; i < data->vgroups.length (); i++)
6612     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6613       {
6614         iv_ca_free (&ivs);
6615         return NULL;
6616       }
6617
6618   return ivs;
6619 }
6620
6621 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6622    points to a bool variable, this function tries to break local
6623    optimal fixed-point by replacing candidates in IVS if it's true.  */
6624
6625 static bool
6626 try_improve_iv_set (struct ivopts_data *data,
6627                     struct iv_ca *ivs, bool *try_replace_p)
6628 {
6629   unsigned i, n_ivs;
6630   comp_cost acost, best_cost = iv_ca_cost (ivs);
6631   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6632   struct iv_cand *cand;
6633
6634   /* Try extending the set of induction variables by one.  */
6635   for (i = 0; i < data->vcands.length (); i++)
6636     {
6637       cand = data->vcands[i];
6638
6639       if (iv_ca_cand_used_p (ivs, cand))
6640         continue;
6641
6642       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6643       if (!act_delta)
6644         continue;
6645
6646       /* If we successfully added the candidate and the set is small enough,
6647          try optimizing it by removing other candidates.  */
6648       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6649         {
6650           iv_ca_delta_commit (data, ivs, act_delta, true);
6651           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6652           iv_ca_delta_commit (data, ivs, act_delta, false);
6653           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6654         }
6655
6656       if (acost < best_cost)
6657         {
6658           best_cost = acost;
6659           iv_ca_delta_free (&best_delta);
6660           best_delta = act_delta;
6661         }
6662       else
6663         iv_ca_delta_free (&act_delta);
6664     }
6665
6666   if (!best_delta)
6667     {
6668       /* Try removing the candidates from the set instead.  */
6669       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6670
6671       if (!best_delta && *try_replace_p)
6672         {
6673           *try_replace_p = false;
6674           /* So far candidate selecting algorithm tends to choose fewer IVs
6675              so that it can handle cases in which loops have many variables
6676              but the best choice is often to use only one general biv.  One
6677              weakness is it can't handle opposite cases, in which different
6678              candidates should be chosen with respect to each use.  To solve
6679              the problem, we replace candidates in a manner described by the
6680              comments of iv_ca_replace, thus give general algorithm a chance
6681              to break local optimal fixed-point in these cases.  */
6682           best_cost = iv_ca_replace (data, ivs, &best_delta);
6683         }
6684
6685       if (!best_delta)
6686         return false;
6687     }
6688
6689   iv_ca_delta_commit (data, ivs, best_delta, true);
6690   iv_ca_delta_free (&best_delta);
6691   return best_cost == iv_ca_cost (ivs);
6692 }
6693
6694 /* Attempts to find the optimal set of induction variables.  We do simple
6695    greedy heuristic -- we try to replace at most one candidate in the selected
6696    solution and remove the unused ivs while this improves the cost.  */
6697
6698 static struct iv_ca *
6699 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6700 {
6701   struct iv_ca *set;
6702   bool try_replace_p = true;
6703
6704   /* Get the initial solution.  */
6705   set = get_initial_solution (data, originalp);
6706   if (!set)
6707     {
6708       if (dump_file && (dump_flags & TDF_DETAILS))
6709         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6710       return NULL;
6711     }
6712
6713   if (dump_file && (dump_flags & TDF_DETAILS))
6714     {
6715       fprintf (dump_file, "Initial set of candidates:\n");
6716       iv_ca_dump (data, dump_file, set);
6717     }
6718
6719   while (try_improve_iv_set (data, set, &try_replace_p))
6720     {
6721       if (dump_file && (dump_flags & TDF_DETAILS))
6722         {
6723           fprintf (dump_file, "Improved to:\n");
6724           iv_ca_dump (data, dump_file, set);
6725         }
6726     }
6727
6728   /* If the set has infinite_cost, it can't be optimal.  */
6729   if (iv_ca_cost (set).infinite_cost_p ())
6730     {
6731       if (dump_file && (dump_flags & TDF_DETAILS))
6732         fprintf (dump_file,
6733                  "Overflow to infinite cost in try_improve_iv_set.\n");
6734       iv_ca_free (&set);
6735     }
6736   return set;
6737 }
6738
6739 static struct iv_ca *
6740 find_optimal_iv_set (struct ivopts_data *data)
6741 {
6742   unsigned i;
6743   comp_cost cost, origcost;
6744   struct iv_ca *set, *origset;
6745
6746   /* Determine the cost based on a strategy that starts with original IVs,
6747      and try again using a strategy that prefers candidates not based
6748      on any IVs.  */
6749   origset = find_optimal_iv_set_1 (data, true);
6750   set = find_optimal_iv_set_1 (data, false);
6751
6752   if (!origset && !set)
6753     return NULL;
6754
6755   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6756   cost = set ? iv_ca_cost (set) : infinite_cost;
6757
6758   if (dump_file && (dump_flags & TDF_DETAILS))
6759     {
6760       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
6761                origcost.cost, origcost.complexity);
6762       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
6763                cost.cost, cost.complexity);
6764     }
6765
6766   /* Choose the one with the best cost.  */
6767   if (origcost <= cost)
6768     {
6769       if (set)
6770         iv_ca_free (&set);
6771       set = origset;
6772     }
6773   else if (origset)
6774     iv_ca_free (&origset);
6775
6776   for (i = 0; i < data->vgroups.length (); i++)
6777     {
6778       struct iv_group *group = data->vgroups[i];
6779       group->selected = iv_ca_cand_for_group (set, group)->cand;
6780     }
6781
6782   return set;
6783 }
6784
6785 /* Creates a new induction variable corresponding to CAND.  */
6786
6787 static void
6788 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6789 {
6790   gimple_stmt_iterator incr_pos;
6791   tree base;
6792   struct iv_use *use;
6793   struct iv_group *group;
6794   bool after = false;
6795
6796   gcc_assert (cand->iv != NULL);
6797
6798   switch (cand->pos)
6799     {
6800     case IP_NORMAL:
6801       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6802       break;
6803
6804     case IP_END:
6805       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6806       after = true;
6807       break;
6808
6809     case IP_AFTER_USE:
6810       after = true;
6811       /* fall through */
6812     case IP_BEFORE_USE:
6813       incr_pos = gsi_for_stmt (cand->incremented_at);
6814       break;
6815
6816     case IP_ORIGINAL:
6817       /* Mark that the iv is preserved.  */
6818       name_info (data, cand->var_before)->preserve_biv = true;
6819       name_info (data, cand->var_after)->preserve_biv = true;
6820
6821       /* Rewrite the increment so that it uses var_before directly.  */
6822       use = find_interesting_uses_op (data, cand->var_after);
6823       group = data->vgroups[use->group_id];
6824       group->selected = cand;
6825       return;
6826     }
6827
6828   gimple_add_tmp_var (cand->var_before);
6829
6830   base = unshare_expr (cand->iv->base);
6831
6832   create_iv (base, unshare_expr (cand->iv->step),
6833              cand->var_before, data->current_loop,
6834              &incr_pos, after, &cand->var_before, &cand->var_after);
6835 }
6836
6837 /* Creates new induction variables described in SET.  */
6838
6839 static void
6840 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6841 {
6842   unsigned i;
6843   struct iv_cand *cand;
6844   bitmap_iterator bi;
6845
6846   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6847     {
6848       cand = data->vcands[i];
6849       create_new_iv (data, cand);
6850     }
6851
6852   if (dump_file && (dump_flags & TDF_DETAILS))
6853     {
6854       fprintf (dump_file, "Selected IV set for loop %d",
6855                data->current_loop->num);
6856       if (data->loop_loc != UNKNOWN_LOCATION)
6857         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6858                  LOCATION_LINE (data->loop_loc));
6859       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6860                avg_loop_niter (data->current_loop));
6861       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6862       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6863         {
6864           cand = data->vcands[i];
6865           dump_cand (dump_file, cand);
6866         }
6867       fprintf (dump_file, "\n");
6868     }
6869 }
6870
6871 /* Rewrites USE (definition of iv used in a nonlinear expression)
6872    using candidate CAND.  */
6873
6874 static void
6875 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6876                             struct iv_use *use, struct iv_cand *cand)
6877 {
6878   gassign *ass;
6879   gimple_stmt_iterator bsi;
6880   tree comp, type = get_use_type (use), tgt;
6881
6882   /* An important special case -- if we are asked to express value of
6883      the original iv by itself, just exit; there is no need to
6884      introduce a new computation (that might also need casting the
6885      variable to unsigned and back).  */
6886   if (cand->pos == IP_ORIGINAL
6887       && cand->incremented_at == use->stmt)
6888     {
6889       tree op = NULL_TREE;
6890       enum tree_code stmt_code;
6891
6892       gcc_assert (is_gimple_assign (use->stmt));
6893       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6894
6895       /* Check whether we may leave the computation unchanged.
6896          This is the case only if it does not rely on other
6897          computations in the loop -- otherwise, the computation
6898          we rely upon may be removed in remove_unused_ivs,
6899          thus leading to ICE.  */
6900       stmt_code = gimple_assign_rhs_code (use->stmt);
6901       if (stmt_code == PLUS_EXPR
6902           || stmt_code == MINUS_EXPR
6903           || stmt_code == POINTER_PLUS_EXPR)
6904         {
6905           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6906             op = gimple_assign_rhs2 (use->stmt);
6907           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6908             op = gimple_assign_rhs1 (use->stmt);
6909         }
6910
6911       if (op != NULL_TREE)
6912         {
6913           if (expr_invariant_in_loop_p (data->current_loop, op))
6914             return;
6915           if (TREE_CODE (op) == SSA_NAME)
6916             {
6917               struct iv *iv = get_iv (data, op);
6918               if (iv != NULL && integer_zerop (iv->step))
6919                 return;
6920             }
6921         }
6922     }
6923
6924   switch (gimple_code (use->stmt))
6925     {
6926     case GIMPLE_PHI:
6927       tgt = PHI_RESULT (use->stmt);
6928
6929       /* If we should keep the biv, do not replace it.  */
6930       if (name_info (data, tgt)->preserve_biv)
6931         return;
6932
6933       bsi = gsi_after_labels (gimple_bb (use->stmt));
6934       break;
6935
6936     case GIMPLE_ASSIGN:
6937       tgt = gimple_assign_lhs (use->stmt);
6938       bsi = gsi_for_stmt (use->stmt);
6939       break;
6940
6941     default:
6942       gcc_unreachable ();
6943     }
6944
6945   aff_tree aff_inv, aff_var;
6946   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6947                               use, cand, &aff_inv, &aff_var))
6948     gcc_unreachable ();
6949
6950   unshare_aff_combination (&aff_inv);
6951   unshare_aff_combination (&aff_var);
6952   /* Prefer CSE opportunity than loop invariant by adding offset at last
6953      so that iv_uses have different offsets can be CSEed.  */
6954   poly_widest_int offset = aff_inv.offset;
6955   aff_inv.offset = 0;
6956
6957   gimple_seq stmt_list = NULL, seq = NULL;
6958   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6959   tree comp_op2 = aff_combination_to_tree (&aff_var);
6960   gcc_assert (comp_op1 && comp_op2);
6961
6962   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6963   gimple_seq_add_seq (&stmt_list, seq);
6964   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6965   gimple_seq_add_seq (&stmt_list, seq);
6966
6967   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6968     std::swap (comp_op1, comp_op2);
6969
6970   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6971     {
6972       comp = fold_build_pointer_plus (comp_op1,
6973                                       fold_convert (sizetype, comp_op2));
6974       comp = fold_build_pointer_plus (comp,
6975                                       wide_int_to_tree (sizetype, offset));
6976     }
6977   else
6978     {
6979       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6980                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
6981       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6982                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6983     }
6984
6985   comp = fold_convert (type, comp);
6986   if (!valid_gimple_rhs_p (comp)
6987       || (gimple_code (use->stmt) != GIMPLE_PHI
6988           /* We can't allow re-allocating the stmt as it might be pointed
6989              to still.  */
6990           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6991               >= gimple_num_ops (gsi_stmt (bsi)))))
6992     {
6993       comp = force_gimple_operand (comp, &seq, true, NULL);
6994       gimple_seq_add_seq (&stmt_list, seq);
6995       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6996         {
6997           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6998           /* As this isn't a plain copy we have to reset alignment
6999              information.  */
7000           if (SSA_NAME_PTR_INFO (comp))
7001             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7002         }
7003     }
7004
7005   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7006   if (gimple_code (use->stmt) == GIMPLE_PHI)
7007     {
7008       ass = gimple_build_assign (tgt, comp);
7009       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7010
7011       bsi = gsi_for_stmt (use->stmt);
7012       remove_phi_node (&bsi, false);
7013     }
7014   else
7015     {
7016       gimple_assign_set_rhs_from_tree (&bsi, comp);
7017       use->stmt = gsi_stmt (bsi);
7018     }
7019 }
7020
7021 /* Performs a peephole optimization to reorder the iv update statement with
7022    a mem ref to enable instruction combining in later phases. The mem ref uses
7023    the iv value before the update, so the reordering transformation requires
7024    adjustment of the offset. CAND is the selected IV_CAND.
7025
7026    Example:
7027
7028    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7029    iv2 = iv1 + 1;
7030
7031    if (t < val)      (1)
7032      goto L;
7033    goto Head;
7034
7035
7036    directly propagating t over to (1) will introduce overlapping live range
7037    thus increase register pressure. This peephole transform it into:
7038
7039
7040    iv2 = iv1 + 1;
7041    t = MEM_REF (base, iv2, 8, 8);
7042    if (t < val)
7043      goto L;
7044    goto Head;
7045 */
7046
7047 static void
7048 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7049 {
7050   tree var_after;
7051   gimple *iv_update, *stmt;
7052   basic_block bb;
7053   gimple_stmt_iterator gsi, gsi_iv;
7054
7055   if (cand->pos != IP_NORMAL)
7056     return;
7057
7058   var_after = cand->var_after;
7059   iv_update = SSA_NAME_DEF_STMT (var_after);
7060
7061   bb = gimple_bb (iv_update);
7062   gsi = gsi_last_nondebug_bb (bb);
7063   stmt = gsi_stmt (gsi);
7064
7065   /* Only handle conditional statement for now.  */
7066   if (gimple_code (stmt) != GIMPLE_COND)
7067     return;
7068
7069   gsi_prev_nondebug (&gsi);
7070   stmt = gsi_stmt (gsi);
7071   if (stmt != iv_update)
7072     return;
7073
7074   gsi_prev_nondebug (&gsi);
7075   if (gsi_end_p (gsi))
7076     return;
7077
7078   stmt = gsi_stmt (gsi);
7079   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7080     return;
7081
7082   if (stmt != use->stmt)
7083     return;
7084
7085   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7086     return;
7087
7088   if (dump_file && (dump_flags & TDF_DETAILS))
7089     {
7090       fprintf (dump_file, "Reordering \n");
7091       print_gimple_stmt (dump_file, iv_update, 0);
7092       print_gimple_stmt (dump_file, use->stmt, 0);
7093       fprintf (dump_file, "\n");
7094     }
7095
7096   gsi = gsi_for_stmt (use->stmt);
7097   gsi_iv = gsi_for_stmt (iv_update);
7098   gsi_move_before (&gsi_iv, &gsi);
7099
7100   cand->pos = IP_BEFORE_USE;
7101   cand->incremented_at = use->stmt;
7102 }
7103
7104 /* Return the alias pointer type that should be used for a MEM_REF
7105    associated with USE, which has type USE_PTR_ADDRESS.  */
7106
7107 static tree
7108 get_alias_ptr_type_for_ptr_address (iv_use *use)
7109 {
7110   gcall *call = as_a <gcall *> (use->stmt);
7111   switch (gimple_call_internal_fn (call))
7112     {
7113     case IFN_MASK_LOAD:
7114     case IFN_MASK_STORE:
7115       /* The second argument contains the correct alias type.  */
7116       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7117       return TREE_TYPE (gimple_call_arg (call, 1));
7118
7119     default:
7120       gcc_unreachable ();
7121     }
7122 }
7123
7124
7125 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7126
7127 static void
7128 rewrite_use_address (struct ivopts_data *data,
7129                      struct iv_use *use, struct iv_cand *cand)
7130 {
7131   aff_tree aff;
7132   bool ok;
7133
7134   adjust_iv_update_pos (cand, use);
7135   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7136   gcc_assert (ok);
7137   unshare_aff_combination (&aff);
7138
7139   /* To avoid undefined overflow problems, all IV candidates use unsigned
7140      integer types.  The drawback is that this makes it impossible for
7141      create_mem_ref to distinguish an IV that is based on a memory object
7142      from one that represents simply an offset.
7143
7144      To work around this problem, we pass a hint to create_mem_ref that
7145      indicates which variable (if any) in aff is an IV based on a memory
7146      object.  Note that we only consider the candidate.  If this is not
7147      based on an object, the base of the reference is in some subexpression
7148      of the use -- but these will use pointer types, so they are recognized
7149      by the create_mem_ref heuristics anyway.  */
7150   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7151   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7152   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7153   tree type = use->mem_type;
7154   tree alias_ptr_type;
7155   if (use->type == USE_PTR_ADDRESS)
7156     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7157   else
7158     {
7159       gcc_assert (type == TREE_TYPE (*use->op_p));
7160       unsigned int align = get_object_alignment (*use->op_p);
7161       if (align != TYPE_ALIGN (type))
7162         type = build_aligned_type (type, align);
7163       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7164     }
7165   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7166                              iv, base_hint, data->speed);
7167
7168   if (use->type == USE_PTR_ADDRESS)
7169     {
7170       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7171       ref = fold_convert (get_use_type (use), ref);
7172       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7173                                       true, GSI_SAME_STMT);
7174     }
7175   else
7176     copy_ref_info (ref, *use->op_p);
7177
7178   *use->op_p = ref;
7179 }
7180
7181 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7182    candidate CAND.  */
7183
7184 static void
7185 rewrite_use_compare (struct ivopts_data *data,
7186                      struct iv_use *use, struct iv_cand *cand)
7187 {
7188   tree comp, op, bound;
7189   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7190   enum tree_code compare;
7191   struct iv_group *group = data->vgroups[use->group_id];
7192   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7193
7194   bound = cp->value;
7195   if (bound)
7196     {
7197       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7198       tree var_type = TREE_TYPE (var);
7199       gimple_seq stmts;
7200
7201       if (dump_file && (dump_flags & TDF_DETAILS))
7202         {
7203           fprintf (dump_file, "Replacing exit test: ");
7204           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7205         }
7206       compare = cp->comp;
7207       bound = unshare_expr (fold_convert (var_type, bound));
7208       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7209       if (stmts)
7210         gsi_insert_seq_on_edge_immediate (
7211                 loop_preheader_edge (data->current_loop),
7212                 stmts);
7213
7214       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7215       gimple_cond_set_lhs (cond_stmt, var);
7216       gimple_cond_set_code (cond_stmt, compare);
7217       gimple_cond_set_rhs (cond_stmt, op);
7218       return;
7219     }
7220
7221   /* The induction variable elimination failed; just express the original
7222      giv.  */
7223   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7224   gcc_assert (comp != NULL_TREE);
7225   gcc_assert (use->op_p != NULL);
7226   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7227                                          SSA_NAME_VAR (*use->op_p),
7228                                          true, GSI_SAME_STMT);
7229 }
7230
7231 /* Rewrite the groups using the selected induction variables.  */
7232
7233 static void
7234 rewrite_groups (struct ivopts_data *data)
7235 {
7236   unsigned i, j;
7237
7238   for (i = 0; i < data->vgroups.length (); i++)
7239     {
7240       struct iv_group *group = data->vgroups[i];
7241       struct iv_cand *cand = group->selected;
7242
7243       gcc_assert (cand);
7244
7245       if (group->type == USE_NONLINEAR_EXPR)
7246         {
7247           for (j = 0; j < group->vuses.length (); j++)
7248             {
7249               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7250               update_stmt (group->vuses[j]->stmt);
7251             }
7252         }
7253       else if (address_p (group->type))
7254         {
7255           for (j = 0; j < group->vuses.length (); j++)
7256             {
7257               rewrite_use_address (data, group->vuses[j], cand);
7258               update_stmt (group->vuses[j]->stmt);
7259             }
7260         }
7261       else
7262         {
7263           gcc_assert (group->type == USE_COMPARE);
7264
7265           for (j = 0; j < group->vuses.length (); j++)
7266             {
7267               rewrite_use_compare (data, group->vuses[j], cand);
7268               update_stmt (group->vuses[j]->stmt);
7269             }
7270         }
7271     }
7272 }
7273
7274 /* Removes the ivs that are not used after rewriting.  */
7275
7276 static void
7277 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7278 {
7279   unsigned j;
7280   bitmap_iterator bi;
7281
7282   /* Figure out an order in which to release SSA DEFs so that we don't
7283      release something that we'd have to propagate into a debug stmt
7284      afterwards.  */
7285   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7286     {
7287       struct version_info *info;
7288
7289       info = ver_info (data, j);
7290       if (info->iv
7291           && !integer_zerop (info->iv->step)
7292           && !info->inv_id
7293           && !info->iv->nonlin_use
7294           && !info->preserve_biv)
7295         {
7296           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7297
7298           tree def = info->iv->ssa_name;
7299
7300           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7301             {
7302               imm_use_iterator imm_iter;
7303               use_operand_p use_p;
7304               gimple *stmt;
7305               int count = 0;
7306
7307               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7308                 {
7309                   if (!gimple_debug_bind_p (stmt))
7310                     continue;
7311
7312                   /* We just want to determine whether to do nothing
7313                      (count == 0), to substitute the computed
7314                      expression into a single use of the SSA DEF by
7315                      itself (count == 1), or to use a debug temp
7316                      because the SSA DEF is used multiple times or as
7317                      part of a larger expression (count > 1). */
7318                   count++;
7319                   if (gimple_debug_bind_get_value (stmt) != def)
7320                     count++;
7321
7322                   if (count > 1)
7323                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7324                 }
7325
7326               if (!count)
7327                 continue;
7328
7329               struct iv_use dummy_use;
7330               struct iv_cand *best_cand = NULL, *cand;
7331               unsigned i, best_pref = 0, cand_pref;
7332
7333               memset (&dummy_use, 0, sizeof (dummy_use));
7334               dummy_use.iv = info->iv;
7335               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7336                 {
7337                   cand = data->vgroups[i]->selected;
7338                   if (cand == best_cand)
7339                     continue;
7340                   cand_pref = operand_equal_p (cand->iv->step,
7341                                                info->iv->step, 0)
7342                     ? 4 : 0;
7343                   cand_pref
7344                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7345                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7346                     ? 2 : 0;
7347                   cand_pref
7348                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7349                     ? 1 : 0;
7350                   if (best_cand == NULL || best_pref < cand_pref)
7351                     {
7352                       best_cand = cand;
7353                       best_pref = cand_pref;
7354                     }
7355                 }
7356
7357               if (!best_cand)
7358                 continue;
7359
7360               tree comp = get_computation_at (data->current_loop,
7361                                               SSA_NAME_DEF_STMT (def),
7362                                               &dummy_use, best_cand);
7363               if (!comp)
7364                 continue;
7365
7366               if (count > 1)
7367                 {
7368                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7369                   DECL_ARTIFICIAL (vexpr) = 1;
7370                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7371                   if (SSA_NAME_VAR (def))
7372                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7373                   else
7374                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7375                   gdebug *def_temp
7376                     = gimple_build_debug_bind (vexpr, comp, NULL);
7377                   gimple_stmt_iterator gsi;
7378
7379                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7380                     gsi = gsi_after_labels (gimple_bb
7381                                             (SSA_NAME_DEF_STMT (def)));
7382                   else
7383                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7384
7385                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7386                   comp = vexpr;
7387                 }
7388
7389               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7390                 {
7391                   if (!gimple_debug_bind_p (stmt))
7392                     continue;
7393
7394                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7395                     SET_USE (use_p, comp);
7396
7397                   update_stmt (stmt);
7398                 }
7399             }
7400         }
7401     }
7402 }
7403
7404 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7405    for hash_map::traverse.  */
7406
7407 bool
7408 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7409 {
7410   free (value);
7411   return true;
7412 }
7413
7414 /* Frees data allocated by the optimization of a single loop.  */
7415
7416 static void
7417 free_loop_data (struct ivopts_data *data)
7418 {
7419   unsigned i, j;
7420   bitmap_iterator bi;
7421   tree obj;
7422
7423   if (data->niters)
7424     {
7425       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7426       delete data->niters;
7427       data->niters = NULL;
7428     }
7429
7430   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7431     {
7432       struct version_info *info;
7433
7434       info = ver_info (data, i);
7435       info->iv = NULL;
7436       info->has_nonlin_use = false;
7437       info->preserve_biv = false;
7438       info->inv_id = 0;
7439     }
7440   bitmap_clear (data->relevant);
7441   bitmap_clear (data->important_candidates);
7442
7443   for (i = 0; i < data->vgroups.length (); i++)
7444     {
7445       struct iv_group *group = data->vgroups[i];
7446
7447       for (j = 0; j < group->vuses.length (); j++)
7448         free (group->vuses[j]);
7449       group->vuses.release ();
7450
7451       BITMAP_FREE (group->related_cands);
7452       for (j = 0; j < group->n_map_members; j++)
7453         {
7454           if (group->cost_map[j].inv_vars)
7455             BITMAP_FREE (group->cost_map[j].inv_vars);
7456           if (group->cost_map[j].inv_exprs)
7457             BITMAP_FREE (group->cost_map[j].inv_exprs);
7458         }
7459
7460       free (group->cost_map);
7461       free (group);
7462     }
7463   data->vgroups.truncate (0);
7464
7465   for (i = 0; i < data->vcands.length (); i++)
7466     {
7467       struct iv_cand *cand = data->vcands[i];
7468
7469       if (cand->inv_vars)
7470         BITMAP_FREE (cand->inv_vars);
7471       if (cand->inv_exprs)
7472         BITMAP_FREE (cand->inv_exprs);
7473       free (cand);
7474     }
7475   data->vcands.truncate (0);
7476
7477   if (data->version_info_size < num_ssa_names)
7478     {
7479       data->version_info_size = 2 * num_ssa_names;
7480       free (data->version_info);
7481       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7482     }
7483
7484   data->max_inv_var_id = 0;
7485   data->max_inv_expr_id = 0;
7486
7487   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7488     SET_DECL_RTL (obj, NULL_RTX);
7489
7490   decl_rtl_to_reset.truncate (0);
7491
7492   data->inv_expr_tab->empty ();
7493
7494   data->iv_common_cand_tab->empty ();
7495   data->iv_common_cands.truncate (0);
7496 }
7497
7498 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7499    loop tree.  */
7500
7501 static void
7502 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7503 {
7504   free_loop_data (data);
7505   free (data->version_info);
7506   BITMAP_FREE (data->relevant);
7507   BITMAP_FREE (data->important_candidates);
7508
7509   decl_rtl_to_reset.release ();
7510   data->vgroups.release ();
7511   data->vcands.release ();
7512   delete data->inv_expr_tab;
7513   data->inv_expr_tab = NULL;
7514   free_affine_expand_cache (&data->name_expansion_cache);
7515   delete data->iv_common_cand_tab;
7516   data->iv_common_cand_tab = NULL;
7517   data->iv_common_cands.release ();
7518   obstack_free (&data->iv_obstack, NULL);
7519 }
7520
7521 /* Returns true if the loop body BODY includes any function calls.  */
7522
7523 static bool
7524 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7525 {
7526   gimple_stmt_iterator gsi;
7527   unsigned i;
7528
7529   for (i = 0; i < num_nodes; i++)
7530     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7531       {
7532         gimple *stmt = gsi_stmt (gsi);
7533         if (is_gimple_call (stmt)
7534             && !gimple_call_internal_p (stmt)
7535             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7536           return true;
7537       }
7538   return false;
7539 }
7540
7541 /* Determine cost scaling factor for basic blocks in loop.  */
7542 #define COST_SCALING_FACTOR_BOUND (20)
7543
7544 static void
7545 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7546 {
7547   int lfreq = data->current_loop->header->count.to_frequency (cfun);
7548   if (!data->speed || lfreq <= 0)
7549     return;
7550
7551   int max_freq = lfreq;
7552   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7553     {
7554       body[i]->aux = (void *)(intptr_t) 1;
7555       if (max_freq < body[i]->count.to_frequency (cfun))
7556         max_freq = body[i]->count.to_frequency (cfun);
7557     }
7558   if (max_freq > lfreq)
7559     {
7560       int divisor, factor;
7561       /* Check if scaling factor itself needs to be scaled by the bound.  This
7562          is to avoid overflow when scaling cost according to profile info.  */
7563       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
7564         {
7565           divisor = max_freq;
7566           factor = COST_SCALING_FACTOR_BOUND;
7567         }
7568       else
7569         {
7570           divisor = lfreq;
7571           factor = 1;
7572         }
7573       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7574         {
7575           int bfreq = body[i]->count.to_frequency (cfun);
7576           if (bfreq <= lfreq)
7577             continue;
7578
7579           body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
7580         }
7581     }
7582 }
7583
7584 /* Optimizes the LOOP.  Returns true if anything changed.  */
7585
7586 static bool
7587 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
7588                            bitmap toremove)
7589 {
7590   bool changed = false;
7591   struct iv_ca *iv_ca;
7592   edge exit = single_dom_exit (loop);
7593   basic_block *body;
7594
7595   gcc_assert (!data->niters);
7596   data->current_loop = loop;
7597   data->loop_loc = find_loop_location (loop).get_location_t ();
7598   data->speed = optimize_loop_for_speed_p (loop);
7599
7600   if (dump_file && (dump_flags & TDF_DETAILS))
7601     {
7602       fprintf (dump_file, "Processing loop %d", loop->num);
7603       if (data->loop_loc != UNKNOWN_LOCATION)
7604         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7605                  LOCATION_LINE (data->loop_loc));
7606       fprintf (dump_file, "\n");
7607
7608       if (exit)
7609         {
7610           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7611                    exit->src->index, exit->dest->index);
7612           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7613           fprintf (dump_file, "\n");
7614         }
7615
7616       fprintf (dump_file, "\n");
7617     }
7618
7619   body = get_loop_body (loop);
7620   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7621   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7622
7623   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7624
7625   /* For each ssa name determines whether it behaves as an induction variable
7626      in some loop.  */
7627   if (!find_induction_variables (data))
7628     goto finish;
7629
7630   /* Finds interesting uses (item 1).  */
7631   find_interesting_uses (data);
7632   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7633     goto finish;
7634
7635   /* Determine cost scaling factor for basic blocks in loop.  */
7636   determine_scaling_factor (data, body);
7637
7638   /* Finds candidates for the induction variables (item 2).  */
7639   find_iv_candidates (data);
7640
7641   /* Calculates the costs (item 3, part 1).  */
7642   determine_iv_costs (data);
7643   determine_group_iv_costs (data);
7644   determine_set_costs (data);
7645
7646   /* Find the optimal set of induction variables (item 3, part 2).  */
7647   iv_ca = find_optimal_iv_set (data);
7648   /* Cleanup basic block aux field.  */
7649   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7650     body[i]->aux = NULL;
7651   if (!iv_ca)
7652     goto finish;
7653   changed = true;
7654
7655   /* Create the new induction variables (item 4, part 1).  */
7656   create_new_ivs (data, iv_ca);
7657   iv_ca_free (&iv_ca);
7658
7659   /* Rewrite the uses (item 4, part 2).  */
7660   rewrite_groups (data);
7661
7662   /* Remove the ivs that are unused after rewriting.  */
7663   remove_unused_ivs (data, toremove);
7664
7665 finish:
7666   free (body);
7667   free_loop_data (data);
7668
7669   return changed;
7670 }
7671
7672 /* Main entry point.  Optimizes induction variables in loops.  */
7673
7674 void
7675 tree_ssa_iv_optimize (void)
7676 {
7677   struct loop *loop;
7678   struct ivopts_data data;
7679   auto_bitmap toremove;
7680
7681   tree_ssa_iv_optimize_init (&data);
7682
7683   /* Optimize the loops starting with the innermost ones.  */
7684   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7685     {
7686       if (dump_file && (dump_flags & TDF_DETAILS))
7687         flow_loop_dump (loop, dump_file, NULL, 1);
7688
7689       tree_ssa_iv_optimize_loop (&data, loop, toremove);
7690     }
7691
7692   /* Remove eliminated IV defs.  */
7693   release_defs_bitset (toremove);
7694
7695   /* We have changed the structure of induction variables; it might happen
7696      that definitions in the scev database refer to some of them that were
7697      eliminated.  */
7698   scev_reset_htab ();
7699   /* Likewise niter and control-IV information.  */
7700   free_numbers_of_iterations_estimates (cfun);
7701
7702   tree_ssa_iv_optimize_finalize (&data);
7703 }
7704
7705 #include "gt-tree-ssa-loop-ivopts.h"