gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "memmodel.h"
  79 #include "tm_p.h"
  80 #include "ssa.h"
  81 #include "expmed.h"
  82 #include "insn-config.h"
  83 #include "emit-rtl.h"
  84 #include "recog.h"
  85 #include "cgraph.h"
  86 #include "gimple-pretty-print.h"
  87 #include "alias.h"
  88 #include "fold-const.h"
  89 #include "stor-layout.h"
  90 #include "tree-eh.h"
  91 #include "gimplify.h"
  92 #include "gimple-iterator.h"
  93 #include "gimplify-me.h"
  94 #include "tree-cfg.h"
  95 #include "tree-ssa-loop-ivopts.h"
  96 #include "tree-ssa-loop-manip.h"
  97 #include "tree-ssa-loop-niter.h"
  98 #include "tree-ssa-loop.h"
  99 #include "explow.h"
 100 #include "expr.h"
 101 #include "tree-dfa.h"
 102 #include "tree-ssa.h"
 103 #include "cfgloop.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "tree-affine.h"
 107 #include "tree-ssa-propagate.h"
 108 #include "tree-ssa-address.h"
 109 #include "builtins.h"
 110 #include "tree-vectorizer.h"
 111
 112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 113    cost of different addressing modes.  This should be moved to a TBD
 114    interface between the GIMPLE and RTL worlds.  */
 115
 116 /* The infinite cost.  */
 117 #define INFTY 10000000
 118
 119 /* Returns the expected number of loop iterations for LOOP.
 120    The average trip count is computed from profile data if it
 121    exists. */
 122
 123 static inline HOST_WIDE_INT
 124 avg_loop_niter (struct loop *loop)
 125 {
 126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 127   if (niter == -1)
 128     {
 129       niter = likely_max_stmt_executions_int (loop);
 130
 131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
 132         return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_ADDRESS,          /* Use in an address.  */
 170   USE_COMPARE           /* Use is a compare.  */
 171 };
 172
 173 /* Cost of a computation.  */
 174 struct comp_cost
 175 {
 176   comp_cost (): cost (0), complexity (0), scratch (0)
 177   {}
 178
 179   comp_cost (int cost, unsigned complexity, int scratch = 0)
 180     : cost (cost), complexity (complexity), scratch (scratch)
 181   {}
 182
 183   /* Returns true if COST is infinite.  */
 184   bool infinite_cost_p ();
 185
 186   /* Adds costs COST1 and COST2.  */
 187   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 188
 189   /* Adds COST to the comp_cost.  */
 190   comp_cost operator+= (comp_cost cost);
 191
 192   /* Adds constant C to this comp_cost.  */
 193   comp_cost operator+= (HOST_WIDE_INT c);
 194
 195   /* Subtracts constant C to this comp_cost.  */
 196   comp_cost operator-= (HOST_WIDE_INT c);
 197
 198   /* Divide the comp_cost by constant C.  */
 199   comp_cost operator/= (HOST_WIDE_INT c);
 200
 201   /* Multiply the comp_cost by constant C.  */
 202   comp_cost operator*= (HOST_WIDE_INT c);
 203
 204   /* Subtracts costs COST1 and COST2.  */
 205   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 206
 207   /* Subtracts COST from this comp_cost.  */
 208   comp_cost operator-= (comp_cost cost);
 209
 210   /* Returns true if COST1 is smaller than COST2.  */
 211   friend bool operator< (comp_cost cost1, comp_cost cost2);
 212
 213   /* Returns true if COST1 and COST2 are equal.  */
 214   friend bool operator== (comp_cost cost1, comp_cost cost2);
 215
 216   /* Returns true if COST1 is smaller or equal than COST2.  */
 217   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 218
 219   int cost;             /* The runtime cost.  */
 220   unsigned complexity;  /* The estimate of the complexity of the code for
 221                            the computation (in no concrete units --
 222                            complexity field should be larger for more
 223                            complex expressions and addressing modes).  */
 224   int scratch;          /* Scratch used during cost computation.  */
 225 };
 226
 227 static const comp_cost no_cost;
 228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
 229
 230 bool
 231 comp_cost::infinite_cost_p ()
 232 {
 233   return cost == INFTY;
 234 }
 235
 236 comp_cost
 237 operator+ (comp_cost cost1, comp_cost cost2)
 238 {
 239   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 240     return infinite_cost;
 241
 242   cost1.cost += cost2.cost;
 243   cost1.complexity += cost2.complexity;
 244
 245   return cost1;
 246 }
 247
 248 comp_cost
 249 operator- (comp_cost cost1, comp_cost cost2)
 250 {
 251   if (cost1.infinite_cost_p ())
 252     return infinite_cost;
 253
 254   gcc_assert (!cost2.infinite_cost_p ());
 255
 256   cost1.cost -= cost2.cost;
 257   cost1.complexity -= cost2.complexity;
 258
 259   return cost1;
 260 }
 261
 262 comp_cost
 263 comp_cost::operator+= (comp_cost cost)
 264 {
 265   *this = *this + cost;
 266   return *this;
 267 }
 268
 269 comp_cost
 270 comp_cost::operator+= (HOST_WIDE_INT c)
 271 {
 272   if (infinite_cost_p ())
 273     return *this;
 274
 275   this->cost += c;
 276
 277   return *this;
 278 }
 279
 280 comp_cost
 281 comp_cost::operator-= (HOST_WIDE_INT c)
 282 {
 283   if (infinite_cost_p ())
 284     return *this;
 285
 286   this->cost -= c;
 287
 288   return *this;
 289 }
 290
 291 comp_cost
 292 comp_cost::operator/= (HOST_WIDE_INT c)
 293 {
 294   if (infinite_cost_p ())
 295     return *this;
 296
 297   this->cost /= c;
 298
 299   return *this;
 300 }
 301
 302 comp_cost
 303 comp_cost::operator*= (HOST_WIDE_INT c)
 304 {
 305   if (infinite_cost_p ())
 306     return *this;
 307
 308   this->cost *= c;
 309
 310   return *this;
 311 }
 312
 313 comp_cost
 314 comp_cost::operator-= (comp_cost cost)
 315 {
 316   *this = *this - cost;
 317   return *this;
 318 }
 319
 320 bool
 321 operator< (comp_cost cost1, comp_cost cost2)
 322 {
 323   if (cost1.cost == cost2.cost)
 324     return cost1.complexity < cost2.complexity;
 325
 326   return cost1.cost < cost2.cost;
 327 }
 328
 329 bool
 330 operator== (comp_cost cost1, comp_cost cost2)
 331 {
 332   return cost1.cost == cost2.cost
 333     && cost1.complexity == cost2.complexity;
 334 }
 335
 336 bool
 337 operator<= (comp_cost cost1, comp_cost cost2)
 338 {
 339   return cost1 < cost2 || cost1 == cost2;
 340 }
 341
 342 struct iv_inv_expr_ent;
 343
 344 /* The candidate - cost pair.  */
 345 struct cost_pair
 346 {
 347   struct iv_cand *cand; /* The candidate.  */
 348   comp_cost cost;       /* The cost.  */
 349   enum tree_code comp;  /* For iv elimination, the comparison.  */
 350   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 351                            preserved when representing iv_use with iv_cand.  */
 352   bitmap inv_exprs;     /* The list of newly created invariant expressions
 353                            when representing iv_use with iv_cand.  */
 354   tree value;           /* For final value elimination, the expression for
 355                            the final value of the iv.  For iv elimination,
 356                            the new bound to compare with.  */
 357 };
 358
 359 /* Use.  */
 360 struct iv_use
 361 {
 362   unsigned id;          /* The id of the use.  */
 363   unsigned group_id;    /* The group id the use belongs to.  */
 364   enum use_type type;   /* Type of the use.  */
 365   struct iv *iv;        /* The induction variable it is based on.  */
 366   gimple *stmt;         /* Statement in that it occurs.  */
 367   tree *op_p;           /* The place where it occurs.  */
 368
 369   tree addr_base;       /* Base address with const offset stripped.  */
 370   unsigned HOST_WIDE_INT addr_offset;
 371                         /* Const offset stripped from base address.  */
 372 };
 373
 374 /* Group of uses.  */
 375 struct iv_group
 376 {
 377   /* The id of the group.  */
 378   unsigned id;
 379   /* Uses of the group are of the same type.  */
 380   enum use_type type;
 381   /* The set of "related" IV candidates, plus the important ones.  */
 382   bitmap related_cands;
 383   /* Number of IV candidates in the cost_map.  */
 384   unsigned n_map_members;
 385   /* The costs wrto the iv candidates.  */
 386   struct cost_pair *cost_map;
 387   /* The selected candidate for the group.  */
 388   struct iv_cand *selected;
 389   /* Uses in the group.  */
 390   vec<struct iv_use *> vuses;
 391 };
 392
 393 /* The position where the iv is computed.  */
 394 enum iv_position
 395 {
 396   IP_NORMAL,            /* At the end, just before the exit condition.  */
 397   IP_END,               /* At the end of the latch block.  */
 398   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 399   IP_AFTER_USE,         /* Immediately after a specific use.  */
 400   IP_ORIGINAL           /* The original biv.  */
 401 };
 402
 403 /* The induction variable candidate.  */
 404 struct iv_cand
 405 {
 406   unsigned id;          /* The number of the candidate.  */
 407   bool important;       /* Whether this is an "important" candidate, i.e. such
 408                            that it should be considered by all uses.  */
 409   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 410   gimple *incremented_at;/* For original biv, the statement where it is
 411                            incremented.  */
 412   tree var_before;      /* The variable used for it before increment.  */
 413   tree var_after;       /* The variable used for it after increment.  */
 414   struct iv *iv;        /* The value of the candidate.  NULL for
 415                            "pseudocandidate" used to indicate the possibility
 416                            to replace the final value of an iv by direct
 417                            computation of the value.  */
 418   unsigned cost;        /* Cost of the candidate.  */
 419   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 420   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 421                               where it is incremented.  */
 422   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 423                            iv_cand.  */
 424   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 425                            hanlde it as a new invariant expression which will
 426                            be hoisted out of loop.  */
 427   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 428                            smaller type.  */
 429 };
 430
 431 /* Hashtable entry for common candidate derived from iv uses.  */
 432 struct iv_common_cand
 433 {
 434   tree base;
 435   tree step;
 436   /* IV uses from which this common candidate is derived.  */
 437   auto_vec<struct iv_use *> uses;
 438   hashval_t hash;
 439 };
 440
 441 /* Hashtable helpers.  */
 442
 443 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 444 {
 445   static inline hashval_t hash (const iv_common_cand *);
 446   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 447 };
 448
 449 /* Hash function for possible common candidates.  */
 450
 451 inline hashval_t
 452 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 453 {
 454   return ccand->hash;
 455 }
 456
 457 /* Hash table equality function for common candidates.  */
 458
 459 inline bool
 460 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 461                               const iv_common_cand *ccand2)
 462 {
 463   return (ccand1->hash == ccand2->hash
 464           && operand_equal_p (ccand1->base, ccand2->base, 0)
 465           && operand_equal_p (ccand1->step, ccand2->step, 0)
 466           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 467               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 468 }
 469
 470 /* Loop invariant expression hashtable entry.  */
 471
 472 struct iv_inv_expr_ent
 473 {
 474   /* Tree expression of the entry.  */
 475   tree expr;
 476   /* Unique indentifier.  */
 477   int id;
 478   /* Hash value.  */
 479   hashval_t hash;
 480 };
 481
 482 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 483
 484 static int
 485 sort_iv_inv_expr_ent (const void *a, const void *b)
 486 {
 487   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 488   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 489
 490   unsigned id1 = (*e1)->id;
 491   unsigned id2 = (*e2)->id;
 492
 493   if (id1 < id2)
 494     return -1;
 495   else if (id1 > id2)
 496     return 1;
 497   else
 498     return 0;
 499 }
 500
 501 /* Hashtable helpers.  */
 502
 503 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 504 {
 505   static inline hashval_t hash (const iv_inv_expr_ent *);
 506   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 507 };
 508
 509 /* Hash function for loop invariant expressions.  */
 510
 511 inline hashval_t
 512 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 513 {
 514   return expr->hash;
 515 }
 516
 517 /* Hash table equality function for expressions.  */
 518
 519 inline bool
 520 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 521                            const iv_inv_expr_ent *expr2)
 522 {
 523   return expr1->hash == expr2->hash
 524          && operand_equal_p (expr1->expr, expr2->expr, 0);
 525 }
 526
 527 struct ivopts_data
 528 {
 529   /* The currently optimized loop.  */
 530   struct loop *current_loop;
 531   source_location loop_loc;
 532
 533   /* Numbers of iterations for all exits of the current loop.  */
 534   hash_map<edge, tree_niter_desc *> *niters;
 535
 536   /* Number of registers used in it.  */
 537   unsigned regs_used;
 538
 539   /* The size of version_info array allocated.  */
 540   unsigned version_info_size;
 541
 542   /* The array of information for the ssa names.  */
 543   struct version_info *version_info;
 544
 545   /* The hashtable of loop invariant expressions created
 546      by ivopt.  */
 547   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 548
 549   /* The bitmap of indices in version_info whose value was changed.  */
 550   bitmap relevant;
 551
 552   /* The uses of induction variables.  */
 553   vec<iv_group *> vgroups;
 554
 555   /* The candidates.  */
 556   vec<iv_cand *> vcands;
 557
 558   /* A bitmap of important candidates.  */
 559   bitmap important_candidates;
 560
 561   /* Cache used by tree_to_aff_combination_expand.  */
 562   hash_map<tree, name_expansion *> *name_expansion_cache;
 563
 564   /* The hashtable of common candidates derived from iv uses.  */
 565   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 566
 567   /* The common candidates.  */
 568   vec<iv_common_cand *> iv_common_cands;
 569
 570   /* The maximum invariant variable id.  */
 571   unsigned max_inv_var_id;
 572
 573   /* The maximum invariant expression id.  */
 574   unsigned max_inv_expr_id;
 575
 576   /* Number of no_overflow BIVs which are not used in memory address.  */
 577   unsigned bivs_not_used_in_addr;
 578
 579   /* Obstack for iv structure.  */
 580   struct obstack iv_obstack;
 581
 582   /* Whether to consider just related and important candidates when replacing a
 583      use.  */
 584   bool consider_all_candidates;
 585
 586   /* Are we optimizing for speed?  */
 587   bool speed;
 588
 589   /* Whether the loop body includes any function calls.  */
 590   bool body_includes_call;
 591
 592   /* Whether the loop body can only be exited via single exit.  */
 593   bool loop_single_exit_p;
 594 };
 595
 596 /* An assignment of iv candidates to uses.  */
 597
 598 struct iv_ca
 599 {
 600   /* The number of uses covered by the assignment.  */
 601   unsigned upto;
 602
 603   /* Number of uses that cannot be expressed by the candidates in the set.  */
 604   unsigned bad_groups;
 605
 606   /* Candidate assigned to a use, together with the related costs.  */
 607   struct cost_pair **cand_for_group;
 608
 609   /* Number of times each candidate is used.  */
 610   unsigned *n_cand_uses;
 611
 612   /* The candidates used.  */
 613   bitmap cands;
 614
 615   /* The number of candidates in the set.  */
 616   unsigned n_cands;
 617
 618   /* The number of invariants needed, including both invariant variants and
 619      invariant expressions.  */
 620   unsigned n_invs;
 621
 622   /* Total cost of expressing uses.  */
 623   comp_cost cand_use_cost;
 624
 625   /* Total cost of candidates.  */
 626   unsigned cand_cost;
 627
 628   /* Number of times each invariant variable is used.  */
 629   unsigned *n_inv_var_uses;
 630
 631   /* Number of times each invariant expression is used.  */
 632   unsigned *n_inv_expr_uses;
 633
 634   /* Total cost of the assignment.  */
 635   comp_cost cost;
 636 };
 637
 638 /* Difference of two iv candidate assignments.  */
 639
 640 struct iv_ca_delta
 641 {
 642   /* Changed group.  */
 643   struct iv_group *group;
 644
 645   /* An old assignment (for rollback purposes).  */
 646   struct cost_pair *old_cp;
 647
 648   /* A new assignment.  */
 649   struct cost_pair *new_cp;
 650
 651   /* Next change in the list.  */
 652   struct iv_ca_delta *next;
 653 };
 654
 655 /* Bound on number of candidates below that all candidates are considered.  */
 656
 657 #define CONSIDER_ALL_CANDIDATES_BOUND \
 658   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 659
 660 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 661    optimizing such a loop would help, and it would take ages).  */
 662
 663 #define MAX_CONSIDERED_GROUPS \
 664   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 665
 666 /* If there are at most this number of ivs in the set, try removing unnecessary
 667    ivs from the set always.  */
 668
 669 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 670   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 671
 672 /* The list of trees for that the decl_rtl field must be reset is stored
 673    here.  */
 674
 675 static vec<tree> decl_rtl_to_reset;
 676
 677 static comp_cost force_expr_to_var_cost (tree, bool);
 678
 679 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 680
 681 edge
 682 single_dom_exit (struct loop *loop)
 683 {
 684   edge exit = single_exit (loop);
 685
 686   if (!exit)
 687     return NULL;
 688
 689   if (!just_once_each_iteration_p (loop, exit->src))
 690     return NULL;
 691
 692   return exit;
 693 }
 694
 695 /* Dumps information about the induction variable IV to FILE.  Don't dump
 696    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 697    preceding spaces indicated by INDENT_LEVEL.  */
 698
 699 void
 700 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 701 {
 702   const char *p;
 703   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 704
 705   if (indent_level > 4)
 706     indent_level = 4;
 707   p = spaces + 8 - (indent_level << 1);
 708
 709   fprintf (file, "%sIV struct:\n", p);
 710   if (iv->ssa_name && dump_name)
 711     {
 712       fprintf (file, "%s  SSA_NAME:\t", p);
 713       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 714       fprintf (file, "\n");
 715     }
 716
 717   fprintf (file, "%s  Type:\t", p);
 718   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 719   fprintf (file, "\n");
 720
 721   fprintf (file, "%s  Base:\t", p);
 722   print_generic_expr (file, iv->base, TDF_SLIM);
 723   fprintf (file, "\n");
 724
 725   fprintf (file, "%s  Step:\t", p);
 726   print_generic_expr (file, iv->step, TDF_SLIM);
 727   fprintf (file, "\n");
 728
 729   if (iv->base_object)
 730     {
 731       fprintf (file, "%s  Object:\t", p);
 732       print_generic_expr (file, iv->base_object, TDF_SLIM);
 733       fprintf (file, "\n");
 734     }
 735
 736   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 737
 738   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 739            p, iv->no_overflow ? "No-overflow" : "Overflow");
 740 }
 741
 742 /* Dumps information about the USE to FILE.  */
 743
 744 void
 745 dump_use (FILE *file, struct iv_use *use)
 746 {
 747   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 748   fprintf (file, "    At stmt:\t");
 749   print_gimple_stmt (file, use->stmt, 0);
 750   fprintf (file, "    At pos:\t");
 751   if (use->op_p)
 752     print_generic_expr (file, *use->op_p, TDF_SLIM);
 753   fprintf (file, "\n");
 754   dump_iv (file, use->iv, false, 2);
 755 }
 756
 757 /* Dumps information about the uses to FILE.  */
 758
 759 void
 760 dump_groups (FILE *file, struct ivopts_data *data)
 761 {
 762   unsigned i, j;
 763   struct iv_group *group;
 764
 765   for (i = 0; i < data->vgroups.length (); i++)
 766     {
 767       group = data->vgroups[i];
 768       fprintf (file, "Group %d:\n", group->id);
 769       if (group->type == USE_NONLINEAR_EXPR)
 770         fprintf (file, "  Type:\tGENERIC\n");
 771       else if (group->type == USE_ADDRESS)
 772         fprintf (file, "  Type:\tADDRESS\n");
 773       else
 774         {
 775           gcc_assert (group->type == USE_COMPARE);
 776           fprintf (file, "  Type:\tCOMPARE\n");
 777         }
 778       for (j = 0; j < group->vuses.length (); j++)
 779         dump_use (file, group->vuses[j]);
 780     }
 781 }
 782
 783 /* Dumps information about induction variable candidate CAND to FILE.  */
 784
 785 void
 786 dump_cand (FILE *file, struct iv_cand *cand)
 787 {
 788   struct iv *iv = cand->iv;
 789
 790   fprintf (file, "Candidate %d:\n", cand->id);
 791   if (cand->inv_vars)
 792     {
 793       fprintf (file, "  Depend on inv.vars: ");
 794       dump_bitmap (file, cand->inv_vars);
 795     }
 796   if (cand->inv_exprs)
 797     {
 798       fprintf (file, "  Depend on inv.exprs: ");
 799       dump_bitmap (file, cand->inv_exprs);
 800     }
 801
 802   if (cand->var_before)
 803     {
 804       fprintf (file, "  Var befor: ");
 805       print_generic_expr (file, cand->var_before, TDF_SLIM);
 806       fprintf (file, "\n");
 807     }
 808   if (cand->var_after)
 809     {
 810       fprintf (file, "  Var after: ");
 811       print_generic_expr (file, cand->var_after, TDF_SLIM);
 812       fprintf (file, "\n");
 813     }
 814
 815   switch (cand->pos)
 816     {
 817     case IP_NORMAL:
 818       fprintf (file, "  Incr POS: before exit test\n");
 819       break;
 820
 821     case IP_BEFORE_USE:
 822       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 823       break;
 824
 825     case IP_AFTER_USE:
 826       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 827       break;
 828
 829     case IP_END:
 830       fprintf (file, "  Incr POS: at end\n");
 831       break;
 832
 833     case IP_ORIGINAL:
 834       fprintf (file, "  Incr POS: orig biv\n");
 835       break;
 836     }
 837
 838   dump_iv (file, iv, false, 1);
 839 }
 840
 841 /* Returns the info for ssa version VER.  */
 842
 843 static inline struct version_info *
 844 ver_info (struct ivopts_data *data, unsigned ver)
 845 {
 846   return data->version_info + ver;
 847 }
 848
 849 /* Returns the info for ssa name NAME.  */
 850
 851 static inline struct version_info *
 852 name_info (struct ivopts_data *data, tree name)
 853 {
 854   return ver_info (data, SSA_NAME_VERSION (name));
 855 }
 856
 857 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 858    emitted in LOOP.  */
 859
 860 static bool
 861 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 862 {
 863   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 864
 865   gcc_assert (bb);
 866
 867   if (sbb == loop->latch)
 868     return true;
 869
 870   if (sbb != bb)
 871     return false;
 872
 873   return stmt == last_stmt (bb);
 874 }
 875
 876 /* Returns true if STMT if after the place where the original induction
 877    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 878    if the positions are identical.  */
 879
 880 static bool
 881 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 882 {
 883   basic_block cand_bb = gimple_bb (cand->incremented_at);
 884   basic_block stmt_bb = gimple_bb (stmt);
 885
 886   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 887     return false;
 888
 889   if (stmt_bb != cand_bb)
 890     return true;
 891
 892   if (true_if_equal
 893       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 894     return true;
 895   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 896 }
 897
 898 /* Returns true if STMT if after the place where the induction variable
 899    CAND is incremented in LOOP.  */
 900
 901 static bool
 902 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 903 {
 904   switch (cand->pos)
 905     {
 906     case IP_END:
 907       return false;
 908
 909     case IP_NORMAL:
 910       return stmt_after_ip_normal_pos (loop, stmt);
 911
 912     case IP_ORIGINAL:
 913     case IP_AFTER_USE:
 914       return stmt_after_inc_pos (cand, stmt, false);
 915
 916     case IP_BEFORE_USE:
 917       return stmt_after_inc_pos (cand, stmt, true);
 918
 919     default:
 920       gcc_unreachable ();
 921     }
 922 }
 923
 924 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 925
 926 static bool
 927 abnormal_ssa_name_p (tree exp)
 928 {
 929   if (!exp)
 930     return false;
 931
 932   if (TREE_CODE (exp) != SSA_NAME)
 933     return false;
 934
 935   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 936 }
 937
 938 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 939    abnormal phi node.  Callback for for_each_index.  */
 940
 941 static bool
 942 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 943                                   void *data ATTRIBUTE_UNUSED)
 944 {
 945   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 946     {
 947       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 948         return false;
 949       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 950         return false;
 951     }
 952
 953   return !abnormal_ssa_name_p (*index);
 954 }
 955
 956 /* Returns true if EXPR contains a ssa name that occurs in an
 957    abnormal phi node.  */
 958
 959 bool
 960 contains_abnormal_ssa_name_p (tree expr)
 961 {
 962   enum tree_code code;
 963   enum tree_code_class codeclass;
 964
 965   if (!expr)
 966     return false;
 967
 968   code = TREE_CODE (expr);
 969   codeclass = TREE_CODE_CLASS (code);
 970
 971   if (code == SSA_NAME)
 972     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 973
 974   if (code == INTEGER_CST
 975       || is_gimple_min_invariant (expr))
 976     return false;
 977
 978   if (code == ADDR_EXPR)
 979     return !for_each_index (&TREE_OPERAND (expr, 0),
 980                             idx_contains_abnormal_ssa_name_p,
 981                             NULL);
 982
 983   if (code == COND_EXPR)
 984     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 985       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 986       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 987
 988   switch (codeclass)
 989     {
 990     case tcc_binary:
 991     case tcc_comparison:
 992       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 993         return true;
 994
 995       /* Fallthru.  */
 996     case tcc_unary:
 997       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 998         return true;
 999
1000       break;
1001
1002     default:
1003       gcc_unreachable ();
1004     }
1005
1006   return false;
1007 }
1008
1009 /*  Returns the structure describing number of iterations determined from
1010     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1011
1012 static struct tree_niter_desc *
1013 niter_for_exit (struct ivopts_data *data, edge exit)
1014 {
1015   struct tree_niter_desc *desc;
1016   tree_niter_desc **slot;
1017
1018   if (!data->niters)
1019     {
1020       data->niters = new hash_map<edge, tree_niter_desc *>;
1021       slot = NULL;
1022     }
1023   else
1024     slot = data->niters->get (exit);
1025
1026   if (!slot)
1027     {
1028       /* Try to determine number of iterations.  We cannot safely work with ssa
1029          names that appear in phi nodes on abnormal edges, so that we do not
1030          create overlapping life ranges for them (PR 27283).  */
1031       desc = XNEW (struct tree_niter_desc);
1032       if (!number_of_iterations_exit (data->current_loop,
1033                                       exit, desc, true)
1034           || contains_abnormal_ssa_name_p (desc->niter))
1035         {
1036           XDELETE (desc);
1037           desc = NULL;
1038         }
1039       data->niters->put (exit, desc);
1040     }
1041   else
1042     desc = *slot;
1043
1044   return desc;
1045 }
1046
1047 /* Returns the structure describing number of iterations determined from
1048    single dominating exit of DATA->current_loop, or NULL if something
1049    goes wrong.  */
1050
1051 static struct tree_niter_desc *
1052 niter_for_single_dom_exit (struct ivopts_data *data)
1053 {
1054   edge exit = single_dom_exit (data->current_loop);
1055
1056   if (!exit)
1057     return NULL;
1058
1059   return niter_for_exit (data, exit);
1060 }
1061
1062 /* Initializes data structures used by the iv optimization pass, stored
1063    in DATA.  */
1064
1065 static void
1066 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1067 {
1068   data->version_info_size = 2 * num_ssa_names;
1069   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1070   data->relevant = BITMAP_ALLOC (NULL);
1071   data->important_candidates = BITMAP_ALLOC (NULL);
1072   data->max_inv_var_id = 0;
1073   data->max_inv_expr_id = 0;
1074   data->niters = NULL;
1075   data->vgroups.create (20);
1076   data->vcands.create (20);
1077   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1078   data->name_expansion_cache = NULL;
1079   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1080   data->iv_common_cands.create (20);
1081   decl_rtl_to_reset.create (20);
1082   gcc_obstack_init (&data->iv_obstack);
1083 }
1084
1085 /* Returns a memory object to that EXPR points.  In case we are able to
1086    determine that it does not point to any such object, NULL is returned.  */
1087
1088 static tree
1089 determine_base_object (tree expr)
1090 {
1091   enum tree_code code = TREE_CODE (expr);
1092   tree base, obj;
1093
1094   /* If this is a pointer casted to any type, we need to determine
1095      the base object for the pointer; so handle conversions before
1096      throwing away non-pointer expressions.  */
1097   if (CONVERT_EXPR_P (expr))
1098     return determine_base_object (TREE_OPERAND (expr, 0));
1099
1100   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1101     return NULL_TREE;
1102
1103   switch (code)
1104     {
1105     case INTEGER_CST:
1106       return NULL_TREE;
1107
1108     case ADDR_EXPR:
1109       obj = TREE_OPERAND (expr, 0);
1110       base = get_base_address (obj);
1111
1112       if (!base)
1113         return expr;
1114
1115       if (TREE_CODE (base) == MEM_REF)
1116         return determine_base_object (TREE_OPERAND (base, 0));
1117
1118       return fold_convert (ptr_type_node,
1119                            build_fold_addr_expr (base));
1120
1121     case POINTER_PLUS_EXPR:
1122       return determine_base_object (TREE_OPERAND (expr, 0));
1123
1124     case PLUS_EXPR:
1125     case MINUS_EXPR:
1126       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1127       gcc_unreachable ();
1128
1129     default:
1130       return fold_convert (ptr_type_node, expr);
1131     }
1132 }
1133
1134 /* Return true if address expression with non-DECL_P operand appears
1135    in EXPR.  */
1136
1137 static bool
1138 contain_complex_addr_expr (tree expr)
1139 {
1140   bool res = false;
1141
1142   STRIP_NOPS (expr);
1143   switch (TREE_CODE (expr))
1144     {
1145     case POINTER_PLUS_EXPR:
1146     case PLUS_EXPR:
1147     case MINUS_EXPR:
1148       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1149       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1150       break;
1151
1152     case ADDR_EXPR:
1153       return (!DECL_P (TREE_OPERAND (expr, 0)));
1154
1155     default:
1156       return false;
1157     }
1158
1159   return res;
1160 }
1161
1162 /* Allocates an induction variable with given initial value BASE and step STEP
1163    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1164
1165 static struct iv *
1166 alloc_iv (struct ivopts_data *data, tree base, tree step,
1167           bool no_overflow = false)
1168 {
1169   tree expr = base;
1170   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1171                                               sizeof (struct iv));
1172   gcc_assert (step != NULL_TREE);
1173
1174   /* Lower address expression in base except ones with DECL_P as operand.
1175      By doing this:
1176        1) More accurate cost can be computed for address expressions;
1177        2) Duplicate candidates won't be created for bases in different
1178           forms, like &a[0] and &a.  */
1179   STRIP_NOPS (expr);
1180   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1181       || contain_complex_addr_expr (expr))
1182     {
1183       aff_tree comb;
1184       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1185       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1186     }
1187
1188   iv->base = base;
1189   iv->base_object = determine_base_object (base);
1190   iv->step = step;
1191   iv->biv_p = false;
1192   iv->nonlin_use = NULL;
1193   iv->ssa_name = NULL_TREE;
1194   if (!no_overflow
1195        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1196                               base, step))
1197     no_overflow = true;
1198   iv->no_overflow = no_overflow;
1199   iv->have_address_use = false;
1200
1201   return iv;
1202 }
1203
1204 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1205    doesn't overflow.  */
1206
1207 static void
1208 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1209         bool no_overflow)
1210 {
1211   struct version_info *info = name_info (data, iv);
1212
1213   gcc_assert (!info->iv);
1214
1215   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1216   info->iv = alloc_iv (data, base, step, no_overflow);
1217   info->iv->ssa_name = iv;
1218 }
1219
1220 /* Finds induction variable declaration for VAR.  */
1221
1222 static struct iv *
1223 get_iv (struct ivopts_data *data, tree var)
1224 {
1225   basic_block bb;
1226   tree type = TREE_TYPE (var);
1227
1228   if (!POINTER_TYPE_P (type)
1229       && !INTEGRAL_TYPE_P (type))
1230     return NULL;
1231
1232   if (!name_info (data, var)->iv)
1233     {
1234       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1235
1236       if (!bb
1237           || !flow_bb_inside_loop_p (data->current_loop, bb))
1238         set_iv (data, var, var, build_int_cst (type, 0), true);
1239     }
1240
1241   return name_info (data, var)->iv;
1242 }
1243
1244 /* Return the first non-invariant ssa var found in EXPR.  */
1245
1246 static tree
1247 extract_single_var_from_expr (tree expr)
1248 {
1249   int i, n;
1250   tree tmp;
1251   enum tree_code code;
1252
1253   if (!expr || is_gimple_min_invariant (expr))
1254     return NULL;
1255
1256   code = TREE_CODE (expr);
1257   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1258     {
1259       n = TREE_OPERAND_LENGTH (expr);
1260       for (i = 0; i < n; i++)
1261         {
1262           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1263
1264           if (tmp)
1265             return tmp;
1266         }
1267     }
1268   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1269 }
1270
1271 /* Finds basic ivs.  */
1272
1273 static bool
1274 find_bivs (struct ivopts_data *data)
1275 {
1276   gphi *phi;
1277   affine_iv iv;
1278   tree step, type, base, stop;
1279   bool found = false;
1280   struct loop *loop = data->current_loop;
1281   gphi_iterator psi;
1282
1283   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1284     {
1285       phi = psi.phi ();
1286
1287       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1288         continue;
1289
1290       if (virtual_operand_p (PHI_RESULT (phi)))
1291         continue;
1292
1293       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1294         continue;
1295
1296       if (integer_zerop (iv.step))
1297         continue;
1298
1299       step = iv.step;
1300       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1301       /* Stop expanding iv base at the first ssa var referred by iv step.
1302          Ideally we should stop at any ssa var, because that's expensive
1303          and unusual to happen, we just do it on the first one.
1304
1305          See PR64705 for the rationale.  */
1306       stop = extract_single_var_from_expr (step);
1307       base = expand_simple_operations (base, stop);
1308       if (contains_abnormal_ssa_name_p (base)
1309           || contains_abnormal_ssa_name_p (step))
1310         continue;
1311
1312       type = TREE_TYPE (PHI_RESULT (phi));
1313       base = fold_convert (type, base);
1314       if (step)
1315         {
1316           if (POINTER_TYPE_P (type))
1317             step = convert_to_ptrofftype (step);
1318           else
1319             step = fold_convert (type, step);
1320         }
1321
1322       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1323       found = true;
1324     }
1325
1326   return found;
1327 }
1328
1329 /* Marks basic ivs.  */
1330
1331 static void
1332 mark_bivs (struct ivopts_data *data)
1333 {
1334   gphi *phi;
1335   gimple *def;
1336   tree var;
1337   struct iv *iv, *incr_iv;
1338   struct loop *loop = data->current_loop;
1339   basic_block incr_bb;
1340   gphi_iterator psi;
1341
1342   data->bivs_not_used_in_addr = 0;
1343   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1344     {
1345       phi = psi.phi ();
1346
1347       iv = get_iv (data, PHI_RESULT (phi));
1348       if (!iv)
1349         continue;
1350
1351       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1352       def = SSA_NAME_DEF_STMT (var);
1353       /* Don't mark iv peeled from other one as biv.  */
1354       if (def
1355           && gimple_code (def) == GIMPLE_PHI
1356           && gimple_bb (def) == loop->header)
1357         continue;
1358
1359       incr_iv = get_iv (data, var);
1360       if (!incr_iv)
1361         continue;
1362
1363       /* If the increment is in the subloop, ignore it.  */
1364       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1365       if (incr_bb->loop_father != data->current_loop
1366           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1367         continue;
1368
1369       iv->biv_p = true;
1370       incr_iv->biv_p = true;
1371       if (iv->no_overflow)
1372         data->bivs_not_used_in_addr++;
1373       if (incr_iv->no_overflow)
1374         data->bivs_not_used_in_addr++;
1375     }
1376 }
1377
1378 /* Checks whether STMT defines a linear induction variable and stores its
1379    parameters to IV.  */
1380
1381 static bool
1382 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1383 {
1384   tree lhs, stop;
1385   struct loop *loop = data->current_loop;
1386
1387   iv->base = NULL_TREE;
1388   iv->step = NULL_TREE;
1389
1390   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1391     return false;
1392
1393   lhs = gimple_assign_lhs (stmt);
1394   if (TREE_CODE (lhs) != SSA_NAME)
1395     return false;
1396
1397   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1398     return false;
1399
1400   /* Stop expanding iv base at the first ssa var referred by iv step.
1401      Ideally we should stop at any ssa var, because that's expensive
1402      and unusual to happen, we just do it on the first one.
1403
1404      See PR64705 for the rationale.  */
1405   stop = extract_single_var_from_expr (iv->step);
1406   iv->base = expand_simple_operations (iv->base, stop);
1407   if (contains_abnormal_ssa_name_p (iv->base)
1408       || contains_abnormal_ssa_name_p (iv->step))
1409     return false;
1410
1411   /* If STMT could throw, then do not consider STMT as defining a GIV.
1412      While this will suppress optimizations, we can not safely delete this
1413      GIV and associated statements, even if it appears it is not used.  */
1414   if (stmt_could_throw_p (stmt))
1415     return false;
1416
1417   return true;
1418 }
1419
1420 /* Finds general ivs in statement STMT.  */
1421
1422 static void
1423 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1424 {
1425   affine_iv iv;
1426
1427   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1428     return;
1429
1430   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1431 }
1432
1433 /* Finds general ivs in basic block BB.  */
1434
1435 static void
1436 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1437 {
1438   gimple_stmt_iterator bsi;
1439
1440   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1441     find_givs_in_stmt (data, gsi_stmt (bsi));
1442 }
1443
1444 /* Finds general ivs.  */
1445
1446 static void
1447 find_givs (struct ivopts_data *data)
1448 {
1449   struct loop *loop = data->current_loop;
1450   basic_block *body = get_loop_body_in_dom_order (loop);
1451   unsigned i;
1452
1453   for (i = 0; i < loop->num_nodes; i++)
1454     find_givs_in_bb (data, body[i]);
1455   free (body);
1456 }
1457
1458 /* For each ssa name defined in LOOP determines whether it is an induction
1459    variable and if so, its initial value and step.  */
1460
1461 static bool
1462 find_induction_variables (struct ivopts_data *data)
1463 {
1464   unsigned i;
1465   bitmap_iterator bi;
1466
1467   if (!find_bivs (data))
1468     return false;
1469
1470   find_givs (data);
1471   mark_bivs (data);
1472
1473   if (dump_file && (dump_flags & TDF_DETAILS))
1474     {
1475       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1476
1477       if (niter)
1478         {
1479           fprintf (dump_file, "  number of iterations ");
1480           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1481           if (!integer_zerop (niter->may_be_zero))
1482             {
1483               fprintf (dump_file, "; zero if ");
1484               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1485             }
1486           fprintf (dump_file, "\n");
1487         };
1488
1489       fprintf (dump_file, "\n<Induction Vars>:\n");
1490       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1491         {
1492           struct version_info *info = ver_info (data, i);
1493           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1494             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1495         }
1496     }
1497
1498   return true;
1499 }
1500
1501 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1502    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1503    is the const offset stripped from IV base; for other types use, both
1504    are zero by default.  */
1505
1506 static struct iv_use *
1507 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1508             gimple *stmt, enum use_type type, tree addr_base,
1509             unsigned HOST_WIDE_INT addr_offset)
1510 {
1511   struct iv_use *use = XCNEW (struct iv_use);
1512
1513   use->id = group->vuses.length ();
1514   use->group_id = group->id;
1515   use->type = type;
1516   use->iv = iv;
1517   use->stmt = stmt;
1518   use->op_p = use_p;
1519   use->addr_base = addr_base;
1520   use->addr_offset = addr_offset;
1521
1522   group->vuses.safe_push (use);
1523   return use;
1524 }
1525
1526 /* Checks whether OP is a loop-level invariant and if so, records it.
1527    NONLINEAR_USE is true if the invariant is used in a way we do not
1528    handle specially.  */
1529
1530 static void
1531 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1532 {
1533   basic_block bb;
1534   struct version_info *info;
1535
1536   if (TREE_CODE (op) != SSA_NAME
1537       || virtual_operand_p (op))
1538     return;
1539
1540   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1541   if (bb
1542       && flow_bb_inside_loop_p (data->current_loop, bb))
1543     return;
1544
1545   info = name_info (data, op);
1546   info->name = op;
1547   info->has_nonlin_use |= nonlinear_use;
1548   if (!info->inv_id)
1549     info->inv_id = ++data->max_inv_var_id;
1550   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1551 }
1552
1553 static tree
1554 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1555
1556 /* Record a group of TYPE.  */
1557
1558 static struct iv_group *
1559 record_group (struct ivopts_data *data, enum use_type type)
1560 {
1561   struct iv_group *group = XCNEW (struct iv_group);
1562
1563   group->id = data->vgroups.length ();
1564   group->type = type;
1565   group->related_cands = BITMAP_ALLOC (NULL);
1566   group->vuses.create (1);
1567
1568   data->vgroups.safe_push (group);
1569   return group;
1570 }
1571
1572 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1573    New group will be created if there is no existing group for the use.  */
1574
1575 static struct iv_use *
1576 record_group_use (struct ivopts_data *data, tree *use_p,
1577                   struct iv *iv, gimple *stmt, enum use_type type)
1578 {
1579   tree addr_base = NULL;
1580   struct iv_group *group = NULL;
1581   unsigned HOST_WIDE_INT addr_offset = 0;
1582
1583   /* Record non address type use in a new group.  */
1584   if (type == USE_ADDRESS && iv->base_object)
1585     {
1586       unsigned int i;
1587
1588       addr_base = strip_offset (iv->base, &addr_offset);
1589       for (i = 0; i < data->vgroups.length (); i++)
1590         {
1591           struct iv_use *use;
1592
1593           group = data->vgroups[i];
1594           use = group->vuses[0];
1595           if (use->type != USE_ADDRESS || !use->iv->base_object)
1596             continue;
1597
1598           /* Check if it has the same stripped base and step.  */
1599           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1600               && operand_equal_p (iv->step, use->iv->step, 0)
1601               && operand_equal_p (addr_base, use->addr_base, 0))
1602             break;
1603         }
1604       if (i == data->vgroups.length ())
1605         group = NULL;
1606     }
1607
1608   if (!group)
1609     group = record_group (data, type);
1610
1611   return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1612 }
1613
1614 /* Checks whether the use OP is interesting and if so, records it.  */
1615
1616 static struct iv_use *
1617 find_interesting_uses_op (struct ivopts_data *data, tree op)
1618 {
1619   struct iv *iv;
1620   gimple *stmt;
1621   struct iv_use *use;
1622
1623   if (TREE_CODE (op) != SSA_NAME)
1624     return NULL;
1625
1626   iv = get_iv (data, op);
1627   if (!iv)
1628     return NULL;
1629
1630   if (iv->nonlin_use)
1631     {
1632       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1633       return iv->nonlin_use;
1634     }
1635
1636   if (integer_zerop (iv->step))
1637     {
1638       record_invariant (data, op, true);
1639       return NULL;
1640     }
1641
1642   stmt = SSA_NAME_DEF_STMT (op);
1643   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1644
1645   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1646   iv->nonlin_use = use;
1647   return use;
1648 }
1649
1650 /* Indicate how compare type iv_use can be handled.  */
1651 enum comp_iv_rewrite
1652 {
1653   COMP_IV_NA,
1654   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1655   COMP_IV_EXPR,
1656   /* We may rewrite compare type iv_uses on both sides of comparison by
1657      expressing value of each iv_use.  */
1658   COMP_IV_EXPR_2,
1659   /* We may rewrite compare type iv_use by expressing value of the iv_use
1660      or by eliminating it with other iv_cand.  */
1661   COMP_IV_ELIM
1662 };
1663
1664 /* Given a condition in statement STMT, checks whether it is a compare
1665    of an induction variable and an invariant.  If this is the case,
1666    CONTROL_VAR is set to location of the iv, BOUND to the location of
1667    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1668    induction variable descriptions, and true is returned.  If this is not
1669    the case, CONTROL_VAR and BOUND are set to the arguments of the
1670    condition and false is returned.  */
1671
1672 static enum comp_iv_rewrite
1673 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1674                        tree **control_var, tree **bound,
1675                        struct iv **iv_var, struct iv **iv_bound)
1676 {
1677   /* The objects returned when COND has constant operands.  */
1678   static struct iv const_iv;
1679   static tree zero;
1680   tree *op0 = &zero, *op1 = &zero;
1681   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1682   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1683
1684   if (gimple_code (stmt) == GIMPLE_COND)
1685     {
1686       gcond *cond_stmt = as_a <gcond *> (stmt);
1687       op0 = gimple_cond_lhs_ptr (cond_stmt);
1688       op1 = gimple_cond_rhs_ptr (cond_stmt);
1689     }
1690   else
1691     {
1692       op0 = gimple_assign_rhs1_ptr (stmt);
1693       op1 = gimple_assign_rhs2_ptr (stmt);
1694     }
1695
1696   zero = integer_zero_node;
1697   const_iv.step = integer_zero_node;
1698
1699   if (TREE_CODE (*op0) == SSA_NAME)
1700     iv0 = get_iv (data, *op0);
1701   if (TREE_CODE (*op1) == SSA_NAME)
1702     iv1 = get_iv (data, *op1);
1703
1704   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1705   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1706     {
1707       rewrite_type = COMP_IV_EXPR_2;
1708       goto end;
1709     }
1710
1711   /* If none side of comparison is IV.  */
1712   if ((!iv0 || integer_zerop (iv0->step))
1713       && (!iv1 || integer_zerop (iv1->step)))
1714     goto end;
1715
1716   /* Control variable may be on the other side.  */
1717   if (!iv0 || integer_zerop (iv0->step))
1718     {
1719       std::swap (op0, op1);
1720       std::swap (iv0, iv1);
1721     }
1722   /* If one side is IV and the other side isn't loop invariant.  */
1723   if (!iv1)
1724     rewrite_type = COMP_IV_EXPR;
1725   /* If one side is IV and the other side is loop invariant.  */
1726   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1727     rewrite_type = COMP_IV_ELIM;
1728
1729 end:
1730   if (control_var)
1731     *control_var = op0;
1732   if (iv_var)
1733     *iv_var = iv0;
1734   if (bound)
1735     *bound = op1;
1736   if (iv_bound)
1737     *iv_bound = iv1;
1738
1739   return rewrite_type;
1740 }
1741
1742 /* Checks whether the condition in STMT is interesting and if so,
1743    records it.  */
1744
1745 static void
1746 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1747 {
1748   tree *var_p, *bound_p;
1749   struct iv *var_iv, *bound_iv;
1750   enum comp_iv_rewrite ret;
1751
1752   ret = extract_cond_operands (data, stmt,
1753                                &var_p, &bound_p, &var_iv, &bound_iv);
1754   if (ret == COMP_IV_NA)
1755     {
1756       find_interesting_uses_op (data, *var_p);
1757       find_interesting_uses_op (data, *bound_p);
1758       return;
1759     }
1760
1761   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE);
1762   /* Record compare type iv_use for iv on the other side of comparison.  */
1763   if (ret == COMP_IV_EXPR_2)
1764     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE);
1765 }
1766
1767 /* Returns the outermost loop EXPR is obviously invariant in
1768    relative to the loop LOOP, i.e. if all its operands are defined
1769    outside of the returned loop.  Returns NULL if EXPR is not
1770    even obviously invariant in LOOP.  */
1771
1772 struct loop *
1773 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1774 {
1775   basic_block def_bb;
1776   unsigned i, len;
1777
1778   if (is_gimple_min_invariant (expr))
1779     return current_loops->tree_root;
1780
1781   if (TREE_CODE (expr) == SSA_NAME)
1782     {
1783       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1784       if (def_bb)
1785         {
1786           if (flow_bb_inside_loop_p (loop, def_bb))
1787             return NULL;
1788           return superloop_at_depth (loop,
1789                                      loop_depth (def_bb->loop_father) + 1);
1790         }
1791
1792       return current_loops->tree_root;
1793     }
1794
1795   if (!EXPR_P (expr))
1796     return NULL;
1797
1798   unsigned maxdepth = 0;
1799   len = TREE_OPERAND_LENGTH (expr);
1800   for (i = 0; i < len; i++)
1801     {
1802       struct loop *ivloop;
1803       if (!TREE_OPERAND (expr, i))
1804         continue;
1805
1806       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1807       if (!ivloop)
1808         return NULL;
1809       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1810     }
1811
1812   return superloop_at_depth (loop, maxdepth);
1813 }
1814
1815 /* Returns true if expression EXPR is obviously invariant in LOOP,
1816    i.e. if all its operands are defined outside of the LOOP.  LOOP
1817    should not be the function body.  */
1818
1819 bool
1820 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1821 {
1822   basic_block def_bb;
1823   unsigned i, len;
1824
1825   gcc_assert (loop_depth (loop) > 0);
1826
1827   if (is_gimple_min_invariant (expr))
1828     return true;
1829
1830   if (TREE_CODE (expr) == SSA_NAME)
1831     {
1832       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1833       if (def_bb
1834           && flow_bb_inside_loop_p (loop, def_bb))
1835         return false;
1836
1837       return true;
1838     }
1839
1840   if (!EXPR_P (expr))
1841     return false;
1842
1843   len = TREE_OPERAND_LENGTH (expr);
1844   for (i = 0; i < len; i++)
1845     if (TREE_OPERAND (expr, i)
1846         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1847       return false;
1848
1849   return true;
1850 }
1851
1852 /* Given expression EXPR which computes inductive values with respect
1853    to loop recorded in DATA, this function returns biv from which EXPR
1854    is derived by tracing definition chains of ssa variables in EXPR.  */
1855
1856 static struct iv*
1857 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1858 {
1859   struct iv *iv;
1860   unsigned i, n;
1861   tree e2, e1;
1862   enum tree_code code;
1863   gimple *stmt;
1864
1865   if (expr == NULL_TREE)
1866     return NULL;
1867
1868   if (is_gimple_min_invariant (expr))
1869     return NULL;
1870
1871   code = TREE_CODE (expr);
1872   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1873     {
1874       n = TREE_OPERAND_LENGTH (expr);
1875       for (i = 0; i < n; i++)
1876         {
1877           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1878           if (iv)
1879             return iv;
1880         }
1881     }
1882
1883   /* Stop if it's not ssa name.  */
1884   if (code != SSA_NAME)
1885     return NULL;
1886
1887   iv = get_iv (data, expr);
1888   if (!iv || integer_zerop (iv->step))
1889     return NULL;
1890   else if (iv->biv_p)
1891     return iv;
1892
1893   stmt = SSA_NAME_DEF_STMT (expr);
1894   if (gphi *phi = dyn_cast <gphi *> (stmt))
1895     {
1896       ssa_op_iter iter;
1897       use_operand_p use_p;
1898       basic_block phi_bb = gimple_bb (phi);
1899
1900       /* Skip loop header PHI that doesn't define biv.  */
1901       if (phi_bb->loop_father == data->current_loop)
1902         return NULL;
1903
1904       if (virtual_operand_p (gimple_phi_result (phi)))
1905         return NULL;
1906
1907       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1908         {
1909           tree use = USE_FROM_PTR (use_p);
1910           iv = find_deriving_biv_for_expr (data, use);
1911           if (iv)
1912             return iv;
1913         }
1914       return NULL;
1915     }
1916   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1917     return NULL;
1918
1919   e1 = gimple_assign_rhs1 (stmt);
1920   code = gimple_assign_rhs_code (stmt);
1921   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1922     return find_deriving_biv_for_expr (data, e1);
1923
1924   switch (code)
1925     {
1926     case MULT_EXPR:
1927     case PLUS_EXPR:
1928     case MINUS_EXPR:
1929     case POINTER_PLUS_EXPR:
1930       /* Increments, decrements and multiplications by a constant
1931          are simple.  */
1932       e2 = gimple_assign_rhs2 (stmt);
1933       iv = find_deriving_biv_for_expr (data, e2);
1934       if (iv)
1935         return iv;
1936       gcc_fallthrough ();
1937
1938     CASE_CONVERT:
1939       /* Casts are simple.  */
1940       return find_deriving_biv_for_expr (data, e1);
1941
1942     default:
1943       break;
1944     }
1945
1946   return NULL;
1947 }
1948
1949 /* Record BIV, its predecessor and successor that they are used in
1950    address type uses.  */
1951
1952 static void
1953 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1954 {
1955   unsigned i;
1956   tree type, base_1, base_2;
1957   bitmap_iterator bi;
1958
1959   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1960       || biv->have_address_use || !biv->no_overflow)
1961     return;
1962
1963   type = TREE_TYPE (biv->base);
1964   if (!INTEGRAL_TYPE_P (type))
1965     return;
1966
1967   biv->have_address_use = true;
1968   data->bivs_not_used_in_addr--;
1969   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1970   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1971     {
1972       struct iv *iv = ver_info (data, i)->iv;
1973
1974       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1975           || iv->have_address_use || !iv->no_overflow)
1976         continue;
1977
1978       if (type != TREE_TYPE (iv->base)
1979           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1980         continue;
1981
1982       if (!operand_equal_p (biv->step, iv->step, 0))
1983         continue;
1984
1985       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1986       if (operand_equal_p (base_1, iv->base, 0)
1987           || operand_equal_p (base_2, biv->base, 0))
1988         {
1989           iv->have_address_use = true;
1990           data->bivs_not_used_in_addr--;
1991         }
1992     }
1993 }
1994
1995 /* Cumulates the steps of indices into DATA and replaces their values with the
1996    initial ones.  Returns false when the value of the index cannot be determined.
1997    Callback for for_each_index.  */
1998
1999 struct ifs_ivopts_data
2000 {
2001   struct ivopts_data *ivopts_data;
2002   gimple *stmt;
2003   tree step;
2004 };
2005
2006 static bool
2007 idx_find_step (tree base, tree *idx, void *data)
2008 {
2009   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2010   struct iv *iv;
2011   bool use_overflow_semantics = false;
2012   tree step, iv_base, iv_step, lbound, off;
2013   struct loop *loop = dta->ivopts_data->current_loop;
2014
2015   /* If base is a component ref, require that the offset of the reference
2016      be invariant.  */
2017   if (TREE_CODE (base) == COMPONENT_REF)
2018     {
2019       off = component_ref_field_offset (base);
2020       return expr_invariant_in_loop_p (loop, off);
2021     }
2022
2023   /* If base is array, first check whether we will be able to move the
2024      reference out of the loop (in order to take its address in strength
2025      reduction).  In order for this to work we need both lower bound
2026      and step to be loop invariants.  */
2027   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2028     {
2029       /* Moreover, for a range, the size needs to be invariant as well.  */
2030       if (TREE_CODE (base) == ARRAY_RANGE_REF
2031           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2032         return false;
2033
2034       step = array_ref_element_size (base);
2035       lbound = array_ref_low_bound (base);
2036
2037       if (!expr_invariant_in_loop_p (loop, step)
2038           || !expr_invariant_in_loop_p (loop, lbound))
2039         return false;
2040     }
2041
2042   if (TREE_CODE (*idx) != SSA_NAME)
2043     return true;
2044
2045   iv = get_iv (dta->ivopts_data, *idx);
2046   if (!iv)
2047     return false;
2048
2049   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2050           *&x[0], which is not folded and does not trigger the
2051           ARRAY_REF path below.  */
2052   *idx = iv->base;
2053
2054   if (integer_zerop (iv->step))
2055     return true;
2056
2057   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2058     {
2059       step = array_ref_element_size (base);
2060
2061       /* We only handle addresses whose step is an integer constant.  */
2062       if (TREE_CODE (step) != INTEGER_CST)
2063         return false;
2064     }
2065   else
2066     /* The step for pointer arithmetics already is 1 byte.  */
2067     step = size_one_node;
2068
2069   iv_base = iv->base;
2070   iv_step = iv->step;
2071   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2072     use_overflow_semantics = true;
2073
2074   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2075                             sizetype, &iv_base, &iv_step, dta->stmt,
2076                             use_overflow_semantics))
2077     {
2078       /* The index might wrap.  */
2079       return false;
2080     }
2081
2082   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2083   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2084
2085   if (dta->ivopts_data->bivs_not_used_in_addr)
2086     {
2087       if (!iv->biv_p)
2088         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2089
2090       record_biv_for_address_use (dta->ivopts_data, iv);
2091     }
2092   return true;
2093 }
2094
2095 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2096    object is passed to it in DATA.  */
2097
2098 static bool
2099 idx_record_use (tree base, tree *idx,
2100                 void *vdata)
2101 {
2102   struct ivopts_data *data = (struct ivopts_data *) vdata;
2103   find_interesting_uses_op (data, *idx);
2104   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2105     {
2106       find_interesting_uses_op (data, array_ref_element_size (base));
2107       find_interesting_uses_op (data, array_ref_low_bound (base));
2108     }
2109   return true;
2110 }
2111
2112 /* If we can prove that TOP = cst * BOT for some constant cst,
2113    store cst to MUL and return true.  Otherwise return false.
2114    The returned value is always sign-extended, regardless of the
2115    signedness of TOP and BOT.  */
2116
2117 static bool
2118 constant_multiple_of (tree top, tree bot, widest_int *mul)
2119 {
2120   tree mby;
2121   enum tree_code code;
2122   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2123   widest_int res, p0, p1;
2124
2125   STRIP_NOPS (top);
2126   STRIP_NOPS (bot);
2127
2128   if (operand_equal_p (top, bot, 0))
2129     {
2130       *mul = 1;
2131       return true;
2132     }
2133
2134   code = TREE_CODE (top);
2135   switch (code)
2136     {
2137     case MULT_EXPR:
2138       mby = TREE_OPERAND (top, 1);
2139       if (TREE_CODE (mby) != INTEGER_CST)
2140         return false;
2141
2142       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2143         return false;
2144
2145       *mul = wi::sext (res * wi::to_widest (mby), precision);
2146       return true;
2147
2148     case PLUS_EXPR:
2149     case MINUS_EXPR:
2150       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2151           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2152         return false;
2153
2154       if (code == MINUS_EXPR)
2155         p1 = -p1;
2156       *mul = wi::sext (p0 + p1, precision);
2157       return true;
2158
2159     case INTEGER_CST:
2160       if (TREE_CODE (bot) != INTEGER_CST)
2161         return false;
2162
2163       p0 = widest_int::from (top, SIGNED);
2164       p1 = widest_int::from (bot, SIGNED);
2165       if (p1 == 0)
2166         return false;
2167       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2168       return res == 0;
2169
2170     default:
2171       return false;
2172     }
2173 }
2174
2175 /* Return true if memory reference REF with step STEP may be unaligned.  */
2176
2177 static bool
2178 may_be_unaligned_p (tree ref, tree step)
2179 {
2180   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2181      thus they are not misaligned.  */
2182   if (TREE_CODE (ref) == TARGET_MEM_REF)
2183     return false;
2184
2185   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2186   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2187     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2188
2189   unsigned HOST_WIDE_INT bitpos;
2190   unsigned int ref_align;
2191   get_object_alignment_1 (ref, &ref_align, &bitpos);
2192   if (ref_align < align
2193       || (bitpos % align) != 0
2194       || (bitpos % BITS_PER_UNIT) != 0)
2195     return true;
2196
2197   unsigned int trailing_zeros = tree_ctz (step);
2198   if (trailing_zeros < HOST_BITS_PER_INT
2199       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2200     return true;
2201
2202   return false;
2203 }
2204
2205 /* Return true if EXPR may be non-addressable.   */
2206
2207 bool
2208 may_be_nonaddressable_p (tree expr)
2209 {
2210   switch (TREE_CODE (expr))
2211     {
2212     case TARGET_MEM_REF:
2213       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2214          target, thus they are always addressable.  */
2215       return false;
2216
2217     case MEM_REF:
2218       /* Likewise for MEM_REFs, modulo the storage order.  */
2219       return REF_REVERSE_STORAGE_ORDER (expr);
2220
2221     case BIT_FIELD_REF:
2222       if (REF_REVERSE_STORAGE_ORDER (expr))
2223         return true;
2224       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2225
2226     case COMPONENT_REF:
2227       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2228         return true;
2229       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2230              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2231
2232     case ARRAY_REF:
2233     case ARRAY_RANGE_REF:
2234       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2235         return true;
2236       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2237
2238     case VIEW_CONVERT_EXPR:
2239       /* This kind of view-conversions may wrap non-addressable objects
2240          and make them look addressable.  After some processing the
2241          non-addressability may be uncovered again, causing ADDR_EXPRs
2242          of inappropriate objects to be built.  */
2243       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2244           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2245         return true;
2246       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2247
2248     CASE_CONVERT:
2249       return true;
2250
2251     default:
2252       break;
2253     }
2254
2255   return false;
2256 }
2257
2258 /* Finds addresses in *OP_P inside STMT.  */
2259
2260 static void
2261 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2262                                tree *op_p)
2263 {
2264   tree base = *op_p, step = size_zero_node;
2265   struct iv *civ;
2266   struct ifs_ivopts_data ifs_ivopts_data;
2267
2268   /* Do not play with volatile memory references.  A bit too conservative,
2269      perhaps, but safe.  */
2270   if (gimple_has_volatile_ops (stmt))
2271     goto fail;
2272
2273   /* Ignore bitfields for now.  Not really something terribly complicated
2274      to handle.  TODO.  */
2275   if (TREE_CODE (base) == BIT_FIELD_REF)
2276     goto fail;
2277
2278   base = unshare_expr (base);
2279
2280   if (TREE_CODE (base) == TARGET_MEM_REF)
2281     {
2282       tree type = build_pointer_type (TREE_TYPE (base));
2283       tree astep;
2284
2285       if (TMR_BASE (base)
2286           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2287         {
2288           civ = get_iv (data, TMR_BASE (base));
2289           if (!civ)
2290             goto fail;
2291
2292           TMR_BASE (base) = civ->base;
2293           step = civ->step;
2294         }
2295       if (TMR_INDEX2 (base)
2296           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2297         {
2298           civ = get_iv (data, TMR_INDEX2 (base));
2299           if (!civ)
2300             goto fail;
2301
2302           TMR_INDEX2 (base) = civ->base;
2303           step = civ->step;
2304         }
2305       if (TMR_INDEX (base)
2306           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2307         {
2308           civ = get_iv (data, TMR_INDEX (base));
2309           if (!civ)
2310             goto fail;
2311
2312           TMR_INDEX (base) = civ->base;
2313           astep = civ->step;
2314
2315           if (astep)
2316             {
2317               if (TMR_STEP (base))
2318                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2319
2320               step = fold_build2 (PLUS_EXPR, type, step, astep);
2321             }
2322         }
2323
2324       if (integer_zerop (step))
2325         goto fail;
2326       base = tree_mem_ref_addr (type, base);
2327     }
2328   else
2329     {
2330       ifs_ivopts_data.ivopts_data = data;
2331       ifs_ivopts_data.stmt = stmt;
2332       ifs_ivopts_data.step = size_zero_node;
2333       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2334           || integer_zerop (ifs_ivopts_data.step))
2335         goto fail;
2336       step = ifs_ivopts_data.step;
2337
2338       /* Check that the base expression is addressable.  This needs
2339          to be done after substituting bases of IVs into it.  */
2340       if (may_be_nonaddressable_p (base))
2341         goto fail;
2342
2343       /* Moreover, on strict alignment platforms, check that it is
2344          sufficiently aligned.  */
2345       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2346         goto fail;
2347
2348       base = build_fold_addr_expr (base);
2349
2350       /* Substituting bases of IVs into the base expression might
2351          have caused folding opportunities.  */
2352       if (TREE_CODE (base) == ADDR_EXPR)
2353         {
2354           tree *ref = &TREE_OPERAND (base, 0);
2355           while (handled_component_p (*ref))
2356             ref = &TREE_OPERAND (*ref, 0);
2357           if (TREE_CODE (*ref) == MEM_REF)
2358             {
2359               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2360                                       TREE_OPERAND (*ref, 0),
2361                                       TREE_OPERAND (*ref, 1));
2362               if (tem)
2363                 *ref = tem;
2364             }
2365         }
2366     }
2367
2368   civ = alloc_iv (data, base, step);
2369   /* Fail if base object of this memory reference is unknown.  */
2370   if (civ->base_object == NULL_TREE)
2371     goto fail;
2372
2373   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2374   return;
2375
2376 fail:
2377   for_each_index (op_p, idx_record_use, data);
2378 }
2379
2380 /* Finds and records invariants used in STMT.  */
2381
2382 static void
2383 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2384 {
2385   ssa_op_iter iter;
2386   use_operand_p use_p;
2387   tree op;
2388
2389   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2390     {
2391       op = USE_FROM_PTR (use_p);
2392       record_invariant (data, op, false);
2393     }
2394 }
2395
2396 /* Finds interesting uses of induction variables in the statement STMT.  */
2397
2398 static void
2399 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2400 {
2401   struct iv *iv;
2402   tree op, *lhs, *rhs;
2403   ssa_op_iter iter;
2404   use_operand_p use_p;
2405   enum tree_code code;
2406
2407   find_invariants_stmt (data, stmt);
2408
2409   if (gimple_code (stmt) == GIMPLE_COND)
2410     {
2411       find_interesting_uses_cond (data, stmt);
2412       return;
2413     }
2414
2415   if (is_gimple_assign (stmt))
2416     {
2417       lhs = gimple_assign_lhs_ptr (stmt);
2418       rhs = gimple_assign_rhs1_ptr (stmt);
2419
2420       if (TREE_CODE (*lhs) == SSA_NAME)
2421         {
2422           /* If the statement defines an induction variable, the uses are not
2423              interesting by themselves.  */
2424
2425           iv = get_iv (data, *lhs);
2426
2427           if (iv && !integer_zerop (iv->step))
2428             return;
2429         }
2430
2431       code = gimple_assign_rhs_code (stmt);
2432       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2433           && (REFERENCE_CLASS_P (*rhs)
2434               || is_gimple_val (*rhs)))
2435         {
2436           if (REFERENCE_CLASS_P (*rhs))
2437             find_interesting_uses_address (data, stmt, rhs);
2438           else
2439             find_interesting_uses_op (data, *rhs);
2440
2441           if (REFERENCE_CLASS_P (*lhs))
2442             find_interesting_uses_address (data, stmt, lhs);
2443           return;
2444         }
2445       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2446         {
2447           find_interesting_uses_cond (data, stmt);
2448           return;
2449         }
2450
2451       /* TODO -- we should also handle address uses of type
2452
2453          memory = call (whatever);
2454
2455          and
2456
2457          call (memory).  */
2458     }
2459
2460   if (gimple_code (stmt) == GIMPLE_PHI
2461       && gimple_bb (stmt) == data->current_loop->header)
2462     {
2463       iv = get_iv (data, PHI_RESULT (stmt));
2464
2465       if (iv && !integer_zerop (iv->step))
2466         return;
2467     }
2468
2469   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2470     {
2471       op = USE_FROM_PTR (use_p);
2472
2473       if (TREE_CODE (op) != SSA_NAME)
2474         continue;
2475
2476       iv = get_iv (data, op);
2477       if (!iv)
2478         continue;
2479
2480       find_interesting_uses_op (data, op);
2481     }
2482 }
2483
2484 /* Finds interesting uses of induction variables outside of loops
2485    on loop exit edge EXIT.  */
2486
2487 static void
2488 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2489 {
2490   gphi *phi;
2491   gphi_iterator psi;
2492   tree def;
2493
2494   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2495     {
2496       phi = psi.phi ();
2497       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2498       if (!virtual_operand_p (def))
2499         find_interesting_uses_op (data, def);
2500     }
2501 }
2502
2503 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2504    mode for memory reference represented by USE.  */
2505
2506 static GTY (()) vec<rtx, va_gc> *addr_list;
2507
2508 static bool
2509 addr_offset_valid_p (struct iv_use *use, HOST_WIDE_INT offset)
2510 {
2511   rtx reg, addr;
2512   unsigned list_index;
2513   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2514   machine_mode addr_mode, mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2515
2516   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2517   if (list_index >= vec_safe_length (addr_list))
2518     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2519
2520   addr = (*addr_list)[list_index];
2521   if (!addr)
2522     {
2523       addr_mode = targetm.addr_space.address_mode (as);
2524       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2525       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2526       (*addr_list)[list_index] = addr;
2527     }
2528   else
2529     addr_mode = GET_MODE (addr);
2530
2531   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2532   return (memory_address_addr_space_p (mem_mode, addr, as));
2533 }
2534
2535 /* Comparison function to sort group in ascending order of addr_offset.  */
2536
2537 static int
2538 group_compare_offset (const void *a, const void *b)
2539 {
2540   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2541   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2542
2543   if ((*u1)->addr_offset != (*u2)->addr_offset)
2544     return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2545   else
2546     return 0;
2547 }
2548
2549 /* Check if small groups should be split.  Return true if no group
2550    contains more than two uses with distinct addr_offsets.  Return
2551    false otherwise.  We want to split such groups because:
2552
2553      1) Small groups don't have much benefit and may interfer with
2554         general candidate selection.
2555      2) Size for problem with only small groups is usually small and
2556         general algorithm can handle it well.
2557
2558    TODO -- Above claim may not hold when we want to merge memory
2559    accesses with conseuctive addresses.  */
2560
2561 static bool
2562 split_small_address_groups_p (struct ivopts_data *data)
2563 {
2564   unsigned int i, j, distinct = 1;
2565   struct iv_use *pre;
2566   struct iv_group *group;
2567
2568   for (i = 0; i < data->vgroups.length (); i++)
2569     {
2570       group = data->vgroups[i];
2571       if (group->vuses.length () == 1)
2572         continue;
2573
2574       gcc_assert (group->type == USE_ADDRESS);
2575       if (group->vuses.length () == 2)
2576         {
2577           if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2578             std::swap (group->vuses[0], group->vuses[1]);
2579         }
2580       else
2581         group->vuses.qsort (group_compare_offset);
2582
2583       if (distinct > 2)
2584         continue;
2585
2586       distinct = 1;
2587       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2588         {
2589           if (group->vuses[j]->addr_offset != pre->addr_offset)
2590             {
2591               pre = group->vuses[j];
2592               distinct++;
2593             }
2594
2595           if (distinct > 2)
2596             break;
2597         }
2598     }
2599
2600   return (distinct <= 2);
2601 }
2602
2603 /* For each group of address type uses, this function further groups
2604    these uses according to the maximum offset supported by target's
2605    [base + offset] addressing mode.  */
2606
2607 static void
2608 split_address_groups (struct ivopts_data *data)
2609 {
2610   unsigned int i, j;
2611   /* Always split group.  */
2612   bool split_p = split_small_address_groups_p (data);
2613
2614   for (i = 0; i < data->vgroups.length (); i++)
2615     {
2616       struct iv_group *new_group = NULL;
2617       struct iv_group *group = data->vgroups[i];
2618       struct iv_use *use = group->vuses[0];
2619
2620       use->id = 0;
2621       use->group_id = group->id;
2622       if (group->vuses.length () == 1)
2623         continue;
2624
2625       gcc_assert (group->type == USE_ADDRESS);
2626
2627       for (j = 1; j < group->vuses.length ();)
2628         {
2629           struct iv_use *next = group->vuses[j];
2630           HOST_WIDE_INT offset = next->addr_offset - use->addr_offset;
2631
2632           /* Split group if aksed to, or the offset against the first
2633              use can't fit in offset part of addressing mode.  IV uses
2634              having the same offset are still kept in one group.  */
2635           if (offset != 0 &&
2636               (split_p || !addr_offset_valid_p (use, offset)))
2637             {
2638               if (!new_group)
2639                 new_group = record_group (data, group->type);
2640               group->vuses.ordered_remove (j);
2641               new_group->vuses.safe_push (next);
2642               continue;
2643             }
2644
2645           next->id = j;
2646           next->group_id = group->id;
2647           j++;
2648         }
2649     }
2650 }
2651
2652 /* Finds uses of the induction variables that are interesting.  */
2653
2654 static void
2655 find_interesting_uses (struct ivopts_data *data)
2656 {
2657   basic_block bb;
2658   gimple_stmt_iterator bsi;
2659   basic_block *body = get_loop_body (data->current_loop);
2660   unsigned i;
2661   edge e;
2662
2663   for (i = 0; i < data->current_loop->num_nodes; i++)
2664     {
2665       edge_iterator ei;
2666       bb = body[i];
2667
2668       FOR_EACH_EDGE (e, ei, bb->succs)
2669         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2670             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2671           find_interesting_uses_outside (data, e);
2672
2673       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2674         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2675       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2676         if (!is_gimple_debug (gsi_stmt (bsi)))
2677           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2678     }
2679   free (body);
2680
2681   split_address_groups (data);
2682
2683   if (dump_file && (dump_flags & TDF_DETAILS))
2684     {
2685       fprintf (dump_file, "\n<IV Groups>:\n");
2686       dump_groups (dump_file, data);
2687       fprintf (dump_file, "\n");
2688     }
2689 }
2690
2691 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2692    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2693    we are at the top-level of the processed address.  */
2694
2695 static tree
2696 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2697                 HOST_WIDE_INT *offset)
2698 {
2699   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2700   enum tree_code code;
2701   tree type, orig_type = TREE_TYPE (expr);
2702   HOST_WIDE_INT off0, off1, st;
2703   tree orig_expr = expr;
2704
2705   STRIP_NOPS (expr);
2706
2707   type = TREE_TYPE (expr);
2708   code = TREE_CODE (expr);
2709   *offset = 0;
2710
2711   switch (code)
2712     {
2713     case INTEGER_CST:
2714       if (!cst_and_fits_in_hwi (expr)
2715           || integer_zerop (expr))
2716         return orig_expr;
2717
2718       *offset = int_cst_value (expr);
2719       return build_int_cst (orig_type, 0);
2720
2721     case POINTER_PLUS_EXPR:
2722     case PLUS_EXPR:
2723     case MINUS_EXPR:
2724       op0 = TREE_OPERAND (expr, 0);
2725       op1 = TREE_OPERAND (expr, 1);
2726
2727       op0 = strip_offset_1 (op0, false, false, &off0);
2728       op1 = strip_offset_1 (op1, false, false, &off1);
2729
2730       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2731       if (op0 == TREE_OPERAND (expr, 0)
2732           && op1 == TREE_OPERAND (expr, 1))
2733         return orig_expr;
2734
2735       if (integer_zerop (op1))
2736         expr = op0;
2737       else if (integer_zerop (op0))
2738         {
2739           if (code == MINUS_EXPR)
2740             expr = fold_build1 (NEGATE_EXPR, type, op1);
2741           else
2742             expr = op1;
2743         }
2744       else
2745         expr = fold_build2 (code, type, op0, op1);
2746
2747       return fold_convert (orig_type, expr);
2748
2749     case MULT_EXPR:
2750       op1 = TREE_OPERAND (expr, 1);
2751       if (!cst_and_fits_in_hwi (op1))
2752         return orig_expr;
2753
2754       op0 = TREE_OPERAND (expr, 0);
2755       op0 = strip_offset_1 (op0, false, false, &off0);
2756       if (op0 == TREE_OPERAND (expr, 0))
2757         return orig_expr;
2758
2759       *offset = off0 * int_cst_value (op1);
2760       if (integer_zerop (op0))
2761         expr = op0;
2762       else
2763         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2764
2765       return fold_convert (orig_type, expr);
2766
2767     case ARRAY_REF:
2768     case ARRAY_RANGE_REF:
2769       if (!inside_addr)
2770         return orig_expr;
2771
2772       step = array_ref_element_size (expr);
2773       if (!cst_and_fits_in_hwi (step))
2774         break;
2775
2776       st = int_cst_value (step);
2777       op1 = TREE_OPERAND (expr, 1);
2778       op1 = strip_offset_1 (op1, false, false, &off1);
2779       *offset = off1 * st;
2780
2781       if (top_compref
2782           && integer_zerop (op1))
2783         {
2784           /* Strip the component reference completely.  */
2785           op0 = TREE_OPERAND (expr, 0);
2786           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2787           *offset += off0;
2788           return op0;
2789         }
2790       break;
2791
2792     case COMPONENT_REF:
2793       {
2794         tree field;
2795
2796         if (!inside_addr)
2797           return orig_expr;
2798
2799         tmp = component_ref_field_offset (expr);
2800         field = TREE_OPERAND (expr, 1);
2801         if (top_compref
2802             && cst_and_fits_in_hwi (tmp)
2803             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2804           {
2805             HOST_WIDE_INT boffset, abs_off;
2806
2807             /* Strip the component reference completely.  */
2808             op0 = TREE_OPERAND (expr, 0);
2809             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2810             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2811             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2812             if (boffset < 0)
2813               abs_off = -abs_off;
2814
2815             *offset = off0 + int_cst_value (tmp) + abs_off;
2816             return op0;
2817           }
2818       }
2819       break;
2820
2821     case ADDR_EXPR:
2822       op0 = TREE_OPERAND (expr, 0);
2823       op0 = strip_offset_1 (op0, true, true, &off0);
2824       *offset += off0;
2825
2826       if (op0 == TREE_OPERAND (expr, 0))
2827         return orig_expr;
2828
2829       expr = build_fold_addr_expr (op0);
2830       return fold_convert (orig_type, expr);
2831
2832     case MEM_REF:
2833       /* ???  Offset operand?  */
2834       inside_addr = false;
2835       break;
2836
2837     default:
2838       return orig_expr;
2839     }
2840
2841   /* Default handling of expressions for that we want to recurse into
2842      the first operand.  */
2843   op0 = TREE_OPERAND (expr, 0);
2844   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2845   *offset += off0;
2846
2847   if (op0 == TREE_OPERAND (expr, 0)
2848       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2849     return orig_expr;
2850
2851   expr = copy_node (expr);
2852   TREE_OPERAND (expr, 0) = op0;
2853   if (op1)
2854     TREE_OPERAND (expr, 1) = op1;
2855
2856   /* Inside address, we might strip the top level component references,
2857      thus changing type of the expression.  Handling of ADDR_EXPR
2858      will fix that.  */
2859   expr = fold_convert (orig_type, expr);
2860
2861   return expr;
2862 }
2863
2864 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2865
2866 static tree
2867 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2868 {
2869   HOST_WIDE_INT off;
2870   tree core = strip_offset_1 (expr, false, false, &off);
2871   *offset = off;
2872   return core;
2873 }
2874
2875 /* Returns variant of TYPE that can be used as base for different uses.
2876    We return unsigned type with the same precision, which avoids problems
2877    with overflows.  */
2878
2879 static tree
2880 generic_type_for (tree type)
2881 {
2882   if (POINTER_TYPE_P (type))
2883     return unsigned_type_for (type);
2884
2885   if (TYPE_UNSIGNED (type))
2886     return type;
2887
2888   return unsigned_type_for (type);
2889 }
2890
2891 /* Private data for walk_tree.  */
2892
2893 struct walk_tree_data
2894 {
2895   bitmap *inv_vars;
2896   struct ivopts_data *idata;
2897 };
2898
2899 /* Callback function for walk_tree, it records invariants and symbol
2900    reference in *EXPR_P.  DATA is the structure storing result info.  */
2901
2902 static tree
2903 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2904 {
2905   tree op = *expr_p;
2906   struct version_info *info;
2907   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2908
2909   if (TREE_CODE (op) != SSA_NAME)
2910     return NULL_TREE;
2911
2912   info = name_info (wdata->idata, op);
2913   /* Because we expand simple operations when finding IVs, loop invariant
2914      variable that isn't referred by the original loop could be used now.
2915      Record such invariant variables here.  */
2916   if (!info->iv)
2917     {
2918       struct ivopts_data *idata = wdata->idata;
2919       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2920
2921       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2922         {
2923           set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
2924           record_invariant (idata, op, false);
2925         }
2926     }
2927   if (!info->inv_id || info->has_nonlin_use)
2928     return NULL_TREE;
2929
2930   if (!*wdata->inv_vars)
2931     *wdata->inv_vars = BITMAP_ALLOC (NULL);
2932   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2933
2934   return NULL_TREE;
2935 }
2936
2937 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
2938    store it.  */
2939
2940 static inline void
2941 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2942 {
2943   struct walk_tree_data wdata;
2944
2945   if (!inv_vars)
2946     return;
2947
2948   wdata.idata = data;
2949   wdata.inv_vars = inv_vars;
2950   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
2951 }
2952
2953 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
2954    will be recorded if it doesn't exist yet.  Given below two exprs:
2955      inv_expr + cst1, inv_expr + cst2
2956    It's hard to make decision whether constant part should be stripped
2957    or not.  We choose to not strip based on below facts:
2958      1) We need to count ADD cost for constant part if it's stripped,
2959         which is't always trivial where this functions is called.
2960      2) Stripping constant away may be conflict with following loop
2961         invariant hoisting pass.
2962      3) Not stripping constant away results in more invariant exprs,
2963         which usually leads to decision preferring lower reg pressure.  */
2964
2965 static iv_inv_expr_ent *
2966 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
2967 {
2968   STRIP_NOPS (inv_expr);
2969
2970   if (TREE_CODE (inv_expr) == INTEGER_CST || TREE_CODE (inv_expr) == SSA_NAME)
2971     return NULL;
2972
2973   /* Don't strip constant part away as we used to.  */
2974
2975   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
2976   struct iv_inv_expr_ent ent;
2977   ent.expr = inv_expr;
2978   ent.hash = iterative_hash_expr (inv_expr, 0);
2979   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
2980
2981   if (!*slot)
2982     {
2983       *slot = XNEW (struct iv_inv_expr_ent);
2984       (*slot)->expr = inv_expr;
2985       (*slot)->hash = ent.hash;
2986       (*slot)->id = ++data->max_inv_expr_id;
2987     }
2988
2989   return *slot;
2990 }
2991
2992 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2993    position to POS.  If USE is not NULL, the candidate is set as related to
2994    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2995    replacement of the final value of the iv by a direct computation.  */
2996
2997 static struct iv_cand *
2998 add_candidate_1 (struct ivopts_data *data,
2999                  tree base, tree step, bool important, enum iv_position pos,
3000                  struct iv_use *use, gimple *incremented_at,
3001                  struct iv *orig_iv = NULL)
3002 {
3003   unsigned i;
3004   struct iv_cand *cand = NULL;
3005   tree type, orig_type;
3006
3007   gcc_assert (base && step);
3008
3009   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3010      live, but the ivopts code may replace a real pointer with one
3011      pointing before or after the memory block that is then adjusted
3012      into the memory block during the loop.  FIXME: It would likely be
3013      better to actually force the pointer live and still use ivopts;
3014      for example, it would be enough to write the pointer into memory
3015      and keep it there until after the loop.  */
3016   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3017     return NULL;
3018
3019   /* For non-original variables, make sure their values are computed in a type
3020      that does not invoke undefined behavior on overflows (since in general,
3021      we cannot prove that these induction variables are non-wrapping).  */
3022   if (pos != IP_ORIGINAL)
3023     {
3024       orig_type = TREE_TYPE (base);
3025       type = generic_type_for (orig_type);
3026       if (type != orig_type)
3027         {
3028           base = fold_convert (type, base);
3029           step = fold_convert (type, step);
3030         }
3031     }
3032
3033   for (i = 0; i < data->vcands.length (); i++)
3034     {
3035       cand = data->vcands[i];
3036
3037       if (cand->pos != pos)
3038         continue;
3039
3040       if (cand->incremented_at != incremented_at
3041           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3042               && cand->ainc_use != use))
3043         continue;
3044
3045       if (operand_equal_p (base, cand->iv->base, 0)
3046           && operand_equal_p (step, cand->iv->step, 0)
3047           && (TYPE_PRECISION (TREE_TYPE (base))
3048               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3049         break;
3050     }
3051
3052   if (i == data->vcands.length ())
3053     {
3054       cand = XCNEW (struct iv_cand);
3055       cand->id = i;
3056       cand->iv = alloc_iv (data, base, step);
3057       cand->pos = pos;
3058       if (pos != IP_ORIGINAL)
3059         {
3060           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3061           cand->var_after = cand->var_before;
3062         }
3063       cand->important = important;
3064       cand->incremented_at = incremented_at;
3065       data->vcands.safe_push (cand);
3066
3067       if (TREE_CODE (step) != INTEGER_CST)
3068         {
3069           find_inv_vars (data, &step, &cand->inv_vars);
3070
3071           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3072           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3073           if (inv_expr != NULL)
3074             {
3075               cand->inv_exprs = cand->inv_vars;
3076               cand->inv_vars = NULL;
3077               if (cand->inv_exprs)
3078                 bitmap_clear (cand->inv_exprs);
3079               else
3080                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3081
3082               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3083             }
3084         }
3085
3086       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3087         cand->ainc_use = use;
3088       else
3089         cand->ainc_use = NULL;
3090
3091       cand->orig_iv = orig_iv;
3092       if (dump_file && (dump_flags & TDF_DETAILS))
3093         dump_cand (dump_file, cand);
3094     }
3095
3096   cand->important |= important;
3097
3098   /* Relate candidate to the group for which it is added.  */
3099   if (use)
3100     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3101
3102   return cand;
3103 }
3104
3105 /* Returns true if incrementing the induction variable at the end of the LOOP
3106    is allowed.
3107
3108    The purpose is to avoid splitting latch edge with a biv increment, thus
3109    creating a jump, possibly confusing other optimization passes and leaving
3110    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3111    available (so we do not have a better alternative), or if the latch edge
3112    is already nonempty.  */
3113
3114 static bool
3115 allow_ip_end_pos_p (struct loop *loop)
3116 {
3117   if (!ip_normal_pos (loop))
3118     return true;
3119
3120   if (!empty_block_p (ip_end_pos (loop)))
3121     return true;
3122
3123   return false;
3124 }
3125
3126 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3127    Important field is set to IMPORTANT.  */
3128
3129 static void
3130 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3131                         bool important, struct iv_use *use)
3132 {
3133   basic_block use_bb = gimple_bb (use->stmt);
3134   machine_mode mem_mode;
3135   unsigned HOST_WIDE_INT cstepi;
3136
3137   /* If we insert the increment in any position other than the standard
3138      ones, we must ensure that it is incremented once per iteration.
3139      It must not be in an inner nested loop, or one side of an if
3140      statement.  */
3141   if (use_bb->loop_father != data->current_loop
3142       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3143       || stmt_could_throw_p (use->stmt)
3144       || !cst_and_fits_in_hwi (step))
3145     return;
3146
3147   cstepi = int_cst_value (step);
3148
3149   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3150   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3151         || USE_STORE_PRE_INCREMENT (mem_mode))
3152        && GET_MODE_SIZE (mem_mode) == cstepi)
3153       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3154            || USE_STORE_PRE_DECREMENT (mem_mode))
3155           && GET_MODE_SIZE (mem_mode) == -cstepi))
3156     {
3157       enum tree_code code = MINUS_EXPR;
3158       tree new_base;
3159       tree new_step = step;
3160
3161       if (POINTER_TYPE_P (TREE_TYPE (base)))
3162         {
3163           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3164           code = POINTER_PLUS_EXPR;
3165         }
3166       else
3167         new_step = fold_convert (TREE_TYPE (base), new_step);
3168       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3169       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3170                        use->stmt);
3171     }
3172   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3173         || USE_STORE_POST_INCREMENT (mem_mode))
3174        && GET_MODE_SIZE (mem_mode) == cstepi)
3175       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3176            || USE_STORE_POST_DECREMENT (mem_mode))
3177           && GET_MODE_SIZE (mem_mode) == -cstepi))
3178     {
3179       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3180                        use->stmt);
3181     }
3182 }
3183
3184 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3185    position to POS.  If USE is not NULL, the candidate is set as related to
3186    it.  The candidate computation is scheduled before exit condition and at
3187    the end of loop.  */
3188
3189 static void
3190 add_candidate (struct ivopts_data *data,
3191                tree base, tree step, bool important, struct iv_use *use,
3192                struct iv *orig_iv = NULL)
3193 {
3194   if (ip_normal_pos (data->current_loop))
3195     add_candidate_1 (data, base, step, important,
3196                      IP_NORMAL, use, NULL, orig_iv);
3197   if (ip_end_pos (data->current_loop)
3198       && allow_ip_end_pos_p (data->current_loop))
3199     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3200 }
3201
3202 /* Adds standard iv candidates.  */
3203
3204 static void
3205 add_standard_iv_candidates (struct ivopts_data *data)
3206 {
3207   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3208
3209   /* The same for a double-integer type if it is still fast enough.  */
3210   if (TYPE_PRECISION
3211         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3212       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3213     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3214                    build_int_cst (long_integer_type_node, 1), true, NULL);
3215
3216   /* The same for a double-integer type if it is still fast enough.  */
3217   if (TYPE_PRECISION
3218         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3219       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3220     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3221                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3222 }
3223
3224
3225 /* Adds candidates bases on the old induction variable IV.  */
3226
3227 static void
3228 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3229 {
3230   gimple *phi;
3231   tree def;
3232   struct iv_cand *cand;
3233
3234   /* Check if this biv is used in address type use.  */
3235   if (iv->no_overflow  && iv->have_address_use
3236       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3237       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3238     {
3239       tree base = fold_convert (sizetype, iv->base);
3240       tree step = fold_convert (sizetype, iv->step);
3241
3242       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3243       add_candidate (data, base, step, true, NULL, iv);
3244       /* Add iv cand of the original type only if it has nonlinear use.  */
3245       if (iv->nonlin_use)
3246         add_candidate (data, iv->base, iv->step, true, NULL);
3247     }
3248   else
3249     add_candidate (data, iv->base, iv->step, true, NULL);
3250
3251   /* The same, but with initial value zero.  */
3252   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3253     add_candidate (data, size_int (0), iv->step, true, NULL);
3254   else
3255     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3256                    iv->step, true, NULL);
3257
3258   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3259   if (gimple_code (phi) == GIMPLE_PHI)
3260     {
3261       /* Additionally record the possibility of leaving the original iv
3262          untouched.  */
3263       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3264       /* Don't add candidate if it's from another PHI node because
3265          it's an affine iv appearing in the form of PEELED_CHREC.  */
3266       phi = SSA_NAME_DEF_STMT (def);
3267       if (gimple_code (phi) != GIMPLE_PHI)
3268         {
3269           cand = add_candidate_1 (data,
3270                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3271                                   SSA_NAME_DEF_STMT (def));
3272           if (cand)
3273             {
3274               cand->var_before = iv->ssa_name;
3275               cand->var_after = def;
3276             }
3277         }
3278       else
3279         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3280     }
3281 }
3282
3283 /* Adds candidates based on the old induction variables.  */
3284
3285 static void
3286 add_iv_candidate_for_bivs (struct ivopts_data *data)
3287 {
3288   unsigned i;
3289   struct iv *iv;
3290   bitmap_iterator bi;
3291
3292   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3293     {
3294       iv = ver_info (data, i)->iv;
3295       if (iv && iv->biv_p && !integer_zerop (iv->step))
3296         add_iv_candidate_for_biv (data, iv);
3297     }
3298 }
3299
3300 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3301
3302 static void
3303 record_common_cand (struct ivopts_data *data, tree base,
3304                     tree step, struct iv_use *use)
3305 {
3306   struct iv_common_cand ent;
3307   struct iv_common_cand **slot;
3308
3309   ent.base = base;
3310   ent.step = step;
3311   ent.hash = iterative_hash_expr (base, 0);
3312   ent.hash = iterative_hash_expr (step, ent.hash);
3313
3314   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3315   if (*slot == NULL)
3316     {
3317       *slot = new iv_common_cand ();
3318       (*slot)->base = base;
3319       (*slot)->step = step;
3320       (*slot)->uses.create (8);
3321       (*slot)->hash = ent.hash;
3322       data->iv_common_cands.safe_push ((*slot));
3323     }
3324
3325   gcc_assert (use != NULL);
3326   (*slot)->uses.safe_push (use);
3327   return;
3328 }
3329
3330 /* Comparison function used to sort common candidates.  */
3331
3332 static int
3333 common_cand_cmp (const void *p1, const void *p2)
3334 {
3335   unsigned n1, n2;
3336   const struct iv_common_cand *const *const ccand1
3337     = (const struct iv_common_cand *const *)p1;
3338   const struct iv_common_cand *const *const ccand2
3339     = (const struct iv_common_cand *const *)p2;
3340
3341   n1 = (*ccand1)->uses.length ();
3342   n2 = (*ccand2)->uses.length ();
3343   return n2 - n1;
3344 }
3345
3346 /* Adds IV candidates based on common candidated recorded.  */
3347
3348 static void
3349 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3350 {
3351   unsigned i, j;
3352   struct iv_cand *cand_1, *cand_2;
3353
3354   data->iv_common_cands.qsort (common_cand_cmp);
3355   for (i = 0; i < data->iv_common_cands.length (); i++)
3356     {
3357       struct iv_common_cand *ptr = data->iv_common_cands[i];
3358
3359       /* Only add IV candidate if it's derived from multiple uses.  */
3360       if (ptr->uses.length () <= 1)
3361         break;
3362
3363       cand_1 = NULL;
3364       cand_2 = NULL;
3365       if (ip_normal_pos (data->current_loop))
3366         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3367                                   false, IP_NORMAL, NULL, NULL);
3368
3369       if (ip_end_pos (data->current_loop)
3370           && allow_ip_end_pos_p (data->current_loop))
3371         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3372                                   false, IP_END, NULL, NULL);
3373
3374       /* Bind deriving uses and the new candidates.  */
3375       for (j = 0; j < ptr->uses.length (); j++)
3376         {
3377           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3378           if (cand_1)
3379             bitmap_set_bit (group->related_cands, cand_1->id);
3380           if (cand_2)
3381             bitmap_set_bit (group->related_cands, cand_2->id);
3382         }
3383     }
3384
3385   /* Release data since it is useless from this point.  */
3386   data->iv_common_cand_tab->empty ();
3387   data->iv_common_cands.truncate (0);
3388 }
3389
3390 /* Adds candidates based on the value of USE's iv.  */
3391
3392 static void
3393 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3394 {
3395   unsigned HOST_WIDE_INT offset;
3396   tree base;
3397   tree basetype;
3398   struct iv *iv = use->iv;
3399
3400   add_candidate (data, iv->base, iv->step, false, use);
3401
3402   /* Record common candidate for use in case it can be shared by others.  */
3403   record_common_cand (data, iv->base, iv->step, use);
3404
3405   /* Record common candidate with initial value zero.  */
3406   basetype = TREE_TYPE (iv->base);
3407   if (POINTER_TYPE_P (basetype))
3408     basetype = sizetype;
3409   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3410
3411   /* Record common candidate with constant offset stripped in base.
3412      Like the use itself, we also add candidate directly for it.  */
3413   base = strip_offset (iv->base, &offset);
3414   if (offset || base != iv->base)
3415     {
3416       record_common_cand (data, base, iv->step, use);
3417       add_candidate (data, base, iv->step, false, use);
3418     }
3419
3420   /* Record common candidate with base_object removed in base.  */
3421   base = iv->base;
3422   STRIP_NOPS (base);
3423   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3424     {
3425       tree step = iv->step;
3426
3427       STRIP_NOPS (step);
3428       base = TREE_OPERAND (base, 1);
3429       step = fold_convert (sizetype, step);
3430       record_common_cand (data, base, step, use);
3431       /* Also record common candidate with offset stripped.  */
3432       base = strip_offset (base, &offset);
3433       if (offset)
3434         record_common_cand (data, base, step, use);
3435     }
3436
3437   /* At last, add auto-incremental candidates.  Make such variables
3438      important since other iv uses with same base object may be based
3439      on it.  */
3440   if (use != NULL && use->type == USE_ADDRESS)
3441     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3442 }
3443
3444 /* Adds candidates based on the uses.  */
3445
3446 static void
3447 add_iv_candidate_for_groups (struct ivopts_data *data)
3448 {
3449   unsigned i;
3450
3451   /* Only add candidate for the first use in group.  */
3452   for (i = 0; i < data->vgroups.length (); i++)
3453     {
3454       struct iv_group *group = data->vgroups[i];
3455
3456       gcc_assert (group->vuses[0] != NULL);
3457       add_iv_candidate_for_use (data, group->vuses[0]);
3458     }
3459   add_iv_candidate_derived_from_uses (data);
3460 }
3461
3462 /* Record important candidates and add them to related_cands bitmaps.  */
3463
3464 static void
3465 record_important_candidates (struct ivopts_data *data)
3466 {
3467   unsigned i;
3468   struct iv_group *group;
3469
3470   for (i = 0; i < data->vcands.length (); i++)
3471     {
3472       struct iv_cand *cand = data->vcands[i];
3473
3474       if (cand->important)
3475         bitmap_set_bit (data->important_candidates, i);
3476     }
3477
3478   data->consider_all_candidates = (data->vcands.length ()
3479                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3480
3481   /* Add important candidates to groups' related_cands bitmaps.  */
3482   for (i = 0; i < data->vgroups.length (); i++)
3483     {
3484       group = data->vgroups[i];
3485       bitmap_ior_into (group->related_cands, data->important_candidates);
3486     }
3487 }
3488
3489 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3490    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3491    we allocate a simple list to every use.  */
3492
3493 static void
3494 alloc_use_cost_map (struct ivopts_data *data)
3495 {
3496   unsigned i, size, s;
3497
3498   for (i = 0; i < data->vgroups.length (); i++)
3499     {
3500       struct iv_group *group = data->vgroups[i];
3501
3502       if (data->consider_all_candidates)
3503         size = data->vcands.length ();
3504       else
3505         {
3506           s = bitmap_count_bits (group->related_cands);
3507
3508           /* Round up to the power of two, so that moduling by it is fast.  */
3509           size = s ? (1 << ceil_log2 (s)) : 1;
3510         }
3511
3512       group->n_map_members = size;
3513       group->cost_map = XCNEWVEC (struct cost_pair, size);
3514     }
3515 }
3516
3517 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3518    on invariants INV_VARS and that the value used in expressing it is
3519    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3520
3521 static void
3522 set_group_iv_cost (struct ivopts_data *data,
3523                    struct iv_group *group, struct iv_cand *cand,
3524                    comp_cost cost, bitmap inv_vars, tree value,
3525                    enum tree_code comp, bitmap inv_exprs)
3526 {
3527   unsigned i, s;
3528
3529   if (cost.infinite_cost_p ())
3530     {
3531       BITMAP_FREE (inv_vars);
3532       BITMAP_FREE (inv_exprs);
3533       return;
3534     }
3535
3536   if (data->consider_all_candidates)
3537     {
3538       group->cost_map[cand->id].cand = cand;
3539       group->cost_map[cand->id].cost = cost;
3540       group->cost_map[cand->id].inv_vars = inv_vars;
3541       group->cost_map[cand->id].inv_exprs = inv_exprs;
3542       group->cost_map[cand->id].value = value;
3543       group->cost_map[cand->id].comp = comp;
3544       return;
3545     }
3546
3547   /* n_map_members is a power of two, so this computes modulo.  */
3548   s = cand->id & (group->n_map_members - 1);
3549   for (i = s; i < group->n_map_members; i++)
3550     if (!group->cost_map[i].cand)
3551       goto found;
3552   for (i = 0; i < s; i++)
3553     if (!group->cost_map[i].cand)
3554       goto found;
3555
3556   gcc_unreachable ();
3557
3558 found:
3559   group->cost_map[i].cand = cand;
3560   group->cost_map[i].cost = cost;
3561   group->cost_map[i].inv_vars = inv_vars;
3562   group->cost_map[i].inv_exprs = inv_exprs;
3563   group->cost_map[i].value = value;
3564   group->cost_map[i].comp = comp;
3565 }
3566
3567 /* Gets cost of (GROUP, CAND) pair.  */
3568
3569 static struct cost_pair *
3570 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3571                    struct iv_cand *cand)
3572 {
3573   unsigned i, s;
3574   struct cost_pair *ret;
3575
3576   if (!cand)
3577     return NULL;
3578
3579   if (data->consider_all_candidates)
3580     {
3581       ret = group->cost_map + cand->id;
3582       if (!ret->cand)
3583         return NULL;
3584
3585       return ret;
3586     }
3587
3588   /* n_map_members is a power of two, so this computes modulo.  */
3589   s = cand->id & (group->n_map_members - 1);
3590   for (i = s; i < group->n_map_members; i++)
3591     if (group->cost_map[i].cand == cand)
3592       return group->cost_map + i;
3593     else if (group->cost_map[i].cand == NULL)
3594       return NULL;
3595   for (i = 0; i < s; i++)
3596     if (group->cost_map[i].cand == cand)
3597       return group->cost_map + i;
3598     else if (group->cost_map[i].cand == NULL)
3599       return NULL;
3600
3601   return NULL;
3602 }
3603
3604 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3605 static rtx
3606 produce_memory_decl_rtl (tree obj, int *regno)
3607 {
3608   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3609   machine_mode address_mode = targetm.addr_space.address_mode (as);
3610   rtx x;
3611
3612   gcc_assert (obj);
3613   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3614     {
3615       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3616       x = gen_rtx_SYMBOL_REF (address_mode, name);
3617       SET_SYMBOL_REF_DECL (x, obj);
3618       x = gen_rtx_MEM (DECL_MODE (obj), x);
3619       set_mem_addr_space (x, as);
3620       targetm.encode_section_info (obj, x, true);
3621     }
3622   else
3623     {
3624       x = gen_raw_REG (address_mode, (*regno)++);
3625       x = gen_rtx_MEM (DECL_MODE (obj), x);
3626       set_mem_addr_space (x, as);
3627     }
3628
3629   return x;
3630 }
3631
3632 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3633    walk_tree.  DATA contains the actual fake register number.  */
3634
3635 static tree
3636 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3637 {
3638   tree obj = NULL_TREE;
3639   rtx x = NULL_RTX;
3640   int *regno = (int *) data;
3641
3642   switch (TREE_CODE (*expr_p))
3643     {
3644     case ADDR_EXPR:
3645       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3646            handled_component_p (*expr_p);
3647            expr_p = &TREE_OPERAND (*expr_p, 0))
3648         continue;
3649       obj = *expr_p;
3650       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3651         x = produce_memory_decl_rtl (obj, regno);
3652       break;
3653
3654     case SSA_NAME:
3655       *ws = 0;
3656       obj = SSA_NAME_VAR (*expr_p);
3657       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3658       if (!obj)
3659         return NULL_TREE;
3660       if (!DECL_RTL_SET_P (obj))
3661         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3662       break;
3663
3664     case VAR_DECL:
3665     case PARM_DECL:
3666     case RESULT_DECL:
3667       *ws = 0;
3668       obj = *expr_p;
3669
3670       if (DECL_RTL_SET_P (obj))
3671         break;
3672
3673       if (DECL_MODE (obj) == BLKmode)
3674         x = produce_memory_decl_rtl (obj, regno);
3675       else
3676         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3677
3678       break;
3679
3680     default:
3681       break;
3682     }
3683
3684   if (x)
3685     {
3686       decl_rtl_to_reset.safe_push (obj);
3687       SET_DECL_RTL (obj, x);
3688     }
3689
3690   return NULL_TREE;
3691 }
3692
3693 /* Determines cost of the computation of EXPR.  */
3694
3695 static unsigned
3696 computation_cost (tree expr, bool speed)
3697 {
3698   rtx_insn *seq;
3699   rtx rslt;
3700   tree type = TREE_TYPE (expr);
3701   unsigned cost;
3702   /* Avoid using hard regs in ways which may be unsupported.  */
3703   int regno = LAST_VIRTUAL_REGISTER + 1;
3704   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3705   enum node_frequency real_frequency = node->frequency;
3706
3707   node->frequency = NODE_FREQUENCY_NORMAL;
3708   crtl->maybe_hot_insn_p = speed;
3709   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3710   start_sequence ();
3711   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3712   seq = get_insns ();
3713   end_sequence ();
3714   default_rtl_profile ();
3715   node->frequency = real_frequency;
3716
3717   cost = seq_cost (seq, speed);
3718   if (MEM_P (rslt))
3719     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3720                           TYPE_ADDR_SPACE (type), speed);
3721   else if (!REG_P (rslt))
3722     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3723
3724   return cost;
3725 }
3726
3727 /* Returns variable containing the value of candidate CAND at statement AT.  */
3728
3729 static tree
3730 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3731 {
3732   if (stmt_after_increment (loop, cand, stmt))
3733     return cand->var_after;
3734   else
3735     return cand->var_before;
3736 }
3737
3738 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3739    same precision that is at least as wide as the precision of TYPE, stores
3740    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3741    type of A and B.  */
3742
3743 static tree
3744 determine_common_wider_type (tree *a, tree *b)
3745 {
3746   tree wider_type = NULL;
3747   tree suba, subb;
3748   tree atype = TREE_TYPE (*a);
3749
3750   if (CONVERT_EXPR_P (*a))
3751     {
3752       suba = TREE_OPERAND (*a, 0);
3753       wider_type = TREE_TYPE (suba);
3754       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3755         return atype;
3756     }
3757   else
3758     return atype;
3759
3760   if (CONVERT_EXPR_P (*b))
3761     {
3762       subb = TREE_OPERAND (*b, 0);
3763       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3764         return atype;
3765     }
3766   else
3767     return atype;
3768
3769   *a = suba;
3770   *b = subb;
3771   return wider_type;
3772 }
3773
3774 /* Determines the expression by that USE is expressed from induction variable
3775    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3776    decomposed form.  The invariant part is stored in AFF_INV; while variant
3777    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3778    non-null.  Returns false if USE cannot be expressed using CAND.  */
3779
3780 static bool
3781 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3782                        struct iv_cand *cand, struct aff_tree *aff_inv,
3783                        struct aff_tree *aff_var, widest_int *prat = NULL)
3784 {
3785   tree ubase = use->iv->base, ustep = use->iv->step;
3786   tree cbase = cand->iv->base, cstep = cand->iv->step;
3787   tree common_type, uutype, var, cstep_common;
3788   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3789   aff_tree aff_cbase;
3790   widest_int rat;
3791
3792   /* We must have a precision to express the values of use.  */
3793   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3794     return false;
3795
3796   var = var_at_stmt (loop, cand, at);
3797   uutype = unsigned_type_for (utype);
3798
3799   /* If the conversion is not noop, perform it.  */
3800   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3801     {
3802       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3803           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3804         {
3805           tree inner_base, inner_step, inner_type;
3806           inner_base = TREE_OPERAND (cbase, 0);
3807           if (CONVERT_EXPR_P (cstep))
3808             inner_step = TREE_OPERAND (cstep, 0);
3809           else
3810             inner_step = cstep;
3811
3812           inner_type = TREE_TYPE (inner_base);
3813           /* If candidate is added from a biv whose type is smaller than
3814              ctype, we know both candidate and the biv won't overflow.
3815              In this case, it's safe to skip the convertion in candidate.
3816              As an example, (unsigned short)((unsigned long)A) equals to
3817              (unsigned short)A, if A has a type no larger than short.  */
3818           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3819             {
3820               cbase = inner_base;
3821               cstep = inner_step;
3822             }
3823         }
3824       cbase = fold_convert (uutype, cbase);
3825       cstep = fold_convert (uutype, cstep);
3826       var = fold_convert (uutype, var);
3827     }
3828
3829   /* Ratio is 1 when computing the value of biv cand by itself.
3830      We can't rely on constant_multiple_of in this case because the
3831      use is created after the original biv is selected.  The call
3832      could fail because of inconsistent fold behavior.  See PR68021
3833      for more information.  */
3834   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3835     {
3836       gcc_assert (is_gimple_assign (use->stmt));
3837       gcc_assert (use->iv->ssa_name == cand->var_after);
3838       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3839       rat = 1;
3840     }
3841   else if (!constant_multiple_of (ustep, cstep, &rat))
3842     return false;
3843
3844   if (prat)
3845     *prat = rat;
3846
3847   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3848      type, we achieve better folding by computing their difference in this
3849      wider type, and cast the result to UUTYPE.  We do not need to worry about
3850      overflows, as all the arithmetics will in the end be performed in UUTYPE
3851      anyway.  */
3852   common_type = determine_common_wider_type (&ubase, &cbase);
3853
3854   /* use = ubase - ratio * cbase + ratio * var.  */
3855   tree_to_aff_combination (ubase, common_type, aff_inv);
3856   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3857   tree_to_aff_combination (var, uutype, aff_var);
3858
3859   /* We need to shift the value if we are after the increment.  */
3860   if (stmt_after_increment (loop, cand, at))
3861     {
3862       aff_tree cstep_aff;
3863
3864       if (common_type != uutype)
3865         cstep_common = fold_convert (common_type, cstep);
3866       else
3867         cstep_common = cstep;
3868
3869       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3870       aff_combination_add (&aff_cbase, &cstep_aff);
3871     }
3872
3873   aff_combination_scale (&aff_cbase, -rat);
3874   aff_combination_add (aff_inv, &aff_cbase);
3875   if (common_type != uutype)
3876     aff_combination_convert (aff_inv, uutype);
3877
3878   aff_combination_scale (aff_var, rat);
3879   return true;
3880 }
3881
3882 /* Determines the expression by that USE is expressed from induction variable
3883    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3884    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3885
3886 static bool
3887 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3888                      struct iv_cand *cand, struct aff_tree *aff)
3889 {
3890   aff_tree aff_var;
3891
3892   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3893     return false;
3894
3895   aff_combination_add (aff, &aff_var);
3896   return true;
3897 }
3898
3899 /* Return the type of USE.  */
3900
3901 static tree
3902 get_use_type (struct iv_use *use)
3903 {
3904   tree base_type = TREE_TYPE (use->iv->base);
3905   tree type;
3906
3907   if (use->type == USE_ADDRESS)
3908     {
3909       /* The base_type may be a void pointer.  Create a pointer type based on
3910          the mem_ref instead.  */
3911       type = build_pointer_type (TREE_TYPE (*use->op_p));
3912       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3913                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3914     }
3915   else
3916     type = base_type;
3917
3918   return type;
3919 }
3920
3921 /* Determines the expression by that USE is expressed from induction variable
3922    CAND at statement AT in LOOP.  The computation is unshared.  */
3923
3924 static tree
3925 get_computation_at (struct loop *loop, gimple *at,
3926                     struct iv_use *use, struct iv_cand *cand)
3927 {
3928   aff_tree aff;
3929   tree type = get_use_type (use);
3930
3931   if (!get_computation_aff (loop, at, use, cand, &aff))
3932     return NULL_TREE;
3933   unshare_aff_combination (&aff);
3934   return fold_convert (type, aff_combination_to_tree (&aff));
3935 }
3936
3937 /* Adjust the cost COST for being in loop setup rather than loop body.
3938    If we're optimizing for space, the loop setup overhead is constant;
3939    if we're optimizing for speed, amortize it over the per-iteration cost.
3940    If ROUND_UP_P is true, the result is round up rather than to zero when
3941    optimizing for speed.  */
3942 static unsigned
3943 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
3944                    bool round_up_p = false)
3945 {
3946   if (cost == INFTY)
3947     return cost;
3948   else if (optimize_loop_for_speed_p (data->current_loop))
3949     {
3950       HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
3951       return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
3952     }
3953   else
3954     return cost;
3955 }
3956
3957 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3958    EXPR operand holding the shift.  COST0 and COST1 are the costs for
3959    calculating the operands of EXPR.  Returns true if successful, and returns
3960    the cost in COST.  */
3961
3962 static bool
3963 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
3964                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3965 {
3966   comp_cost res;
3967   tree op1 = TREE_OPERAND (expr, 1);
3968   tree cst = TREE_OPERAND (mult, 1);
3969   tree multop = TREE_OPERAND (mult, 0);
3970   int m = exact_log2 (int_cst_value (cst));
3971   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3972   int as_cost, sa_cost;
3973   bool mult_in_op1;
3974
3975   if (!(m >= 0 && m < maxm))
3976     return false;
3977
3978   STRIP_NOPS (op1);
3979   mult_in_op1 = operand_equal_p (op1, mult, 0);
3980
3981   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3982
3983   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3984      use that in preference to a shift insn followed by an add insn.  */
3985   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3986              ? shiftadd_cost (speed, mode, m)
3987              : (mult_in_op1
3988                 ? shiftsub1_cost (speed, mode, m)
3989                 : shiftsub0_cost (speed, mode, m)));
3990
3991   res = comp_cost (MIN (as_cost, sa_cost), 0);
3992   res += (mult_in_op1 ? cost0 : cost1);
3993
3994   STRIP_NOPS (multop);
3995   if (!is_gimple_val (multop))
3996     res += force_expr_to_var_cost (multop, speed);
3997
3998   *cost = res;
3999   return true;
4000 }
4001
4002 /* Estimates cost of forcing expression EXPR into a variable.  */
4003
4004 static comp_cost
4005 force_expr_to_var_cost (tree expr, bool speed)
4006 {
4007   static bool costs_initialized = false;
4008   static unsigned integer_cost [2];
4009   static unsigned symbol_cost [2];
4010   static unsigned address_cost [2];
4011   tree op0, op1;
4012   comp_cost cost0, cost1, cost;
4013   machine_mode mode;
4014   scalar_int_mode int_mode;
4015
4016   if (!costs_initialized)
4017     {
4018       tree type = build_pointer_type (integer_type_node);
4019       tree var, addr;
4020       rtx x;
4021       int i;
4022
4023       var = create_tmp_var_raw (integer_type_node, "test_var");
4024       TREE_STATIC (var) = 1;
4025       x = produce_memory_decl_rtl (var, NULL);
4026       SET_DECL_RTL (var, x);
4027
4028       addr = build1 (ADDR_EXPR, type, var);
4029
4030
4031       for (i = 0; i < 2; i++)
4032         {
4033           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4034                                                              2000), i);
4035
4036           symbol_cost[i] = computation_cost (addr, i) + 1;
4037
4038           address_cost[i]
4039             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4040           if (dump_file && (dump_flags & TDF_DETAILS))
4041             {
4042               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4043               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4044               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4045               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4046               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4047               fprintf (dump_file, "\n");
4048             }
4049         }
4050
4051       costs_initialized = true;
4052     }
4053
4054   STRIP_NOPS (expr);
4055
4056   if (SSA_VAR_P (expr))
4057     return no_cost;
4058
4059   if (is_gimple_min_invariant (expr))
4060     {
4061       if (TREE_CODE (expr) == INTEGER_CST)
4062         return comp_cost (integer_cost [speed], 0);
4063
4064       if (TREE_CODE (expr) == ADDR_EXPR)
4065         {
4066           tree obj = TREE_OPERAND (expr, 0);
4067
4068           if (VAR_P (obj)
4069               || TREE_CODE (obj) == PARM_DECL
4070               || TREE_CODE (obj) == RESULT_DECL)
4071             return comp_cost (symbol_cost [speed], 0);
4072         }
4073
4074       return comp_cost (address_cost [speed], 0);
4075     }
4076
4077   switch (TREE_CODE (expr))
4078     {
4079     case POINTER_PLUS_EXPR:
4080     case PLUS_EXPR:
4081     case MINUS_EXPR:
4082     case MULT_EXPR:
4083     case TRUNC_DIV_EXPR:
4084     case BIT_AND_EXPR:
4085     case BIT_IOR_EXPR:
4086     case LSHIFT_EXPR:
4087     case RSHIFT_EXPR:
4088       op0 = TREE_OPERAND (expr, 0);
4089       op1 = TREE_OPERAND (expr, 1);
4090       STRIP_NOPS (op0);
4091       STRIP_NOPS (op1);
4092       break;
4093
4094     CASE_CONVERT:
4095     case NEGATE_EXPR:
4096     case BIT_NOT_EXPR:
4097       op0 = TREE_OPERAND (expr, 0);
4098       STRIP_NOPS (op0);
4099       op1 = NULL_TREE;
4100       break;
4101
4102     default:
4103       /* Just an arbitrary value, FIXME.  */
4104       return comp_cost (target_spill_cost[speed], 0);
4105     }
4106
4107   if (op0 == NULL_TREE
4108       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4109     cost0 = no_cost;
4110   else
4111     cost0 = force_expr_to_var_cost (op0, speed);
4112
4113   if (op1 == NULL_TREE
4114       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4115     cost1 = no_cost;
4116   else
4117     cost1 = force_expr_to_var_cost (op1, speed);
4118
4119   mode = TYPE_MODE (TREE_TYPE (expr));
4120   switch (TREE_CODE (expr))
4121     {
4122     case POINTER_PLUS_EXPR:
4123     case PLUS_EXPR:
4124     case MINUS_EXPR:
4125     case NEGATE_EXPR:
4126       cost = comp_cost (add_cost (speed, mode), 0);
4127       if (TREE_CODE (expr) != NEGATE_EXPR)
4128         {
4129           tree mult = NULL_TREE;
4130           comp_cost sa_cost;
4131           if (TREE_CODE (op1) == MULT_EXPR)
4132             mult = op1;
4133           else if (TREE_CODE (op0) == MULT_EXPR)
4134             mult = op0;
4135
4136           if (mult != NULL_TREE
4137               && is_a <scalar_int_mode> (mode, &int_mode)
4138               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4139               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4140                                     speed, &sa_cost))
4141             return sa_cost;
4142         }
4143       break;
4144
4145     CASE_CONVERT:
4146       {
4147         tree inner_mode, outer_mode;
4148         outer_mode = TREE_TYPE (expr);
4149         inner_mode = TREE_TYPE (op0);
4150         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4151                                        TYPE_MODE (inner_mode), speed), 0);
4152       }
4153       break;
4154
4155     case MULT_EXPR:
4156       if (cst_and_fits_in_hwi (op0))
4157         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4158                                              mode, speed), 0);
4159       else if (cst_and_fits_in_hwi (op1))
4160         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4161                                              mode, speed), 0);
4162       else
4163         return comp_cost (target_spill_cost [speed], 0);
4164       break;
4165
4166     case TRUNC_DIV_EXPR:
4167       /* Division by power of two is usually cheap, so we allow it.  Forbid
4168          anything else.  */
4169       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4170         cost = comp_cost (add_cost (speed, mode), 0);
4171       else
4172         cost = comp_cost (target_spill_cost[speed], 0);
4173       break;
4174
4175     case BIT_AND_EXPR:
4176     case BIT_IOR_EXPR:
4177     case BIT_NOT_EXPR:
4178     case LSHIFT_EXPR:
4179     case RSHIFT_EXPR:
4180       cost = comp_cost (add_cost (speed, mode), 0);
4181       break;
4182
4183     default:
4184       gcc_unreachable ();
4185     }
4186
4187   cost += cost0;
4188   cost += cost1;
4189   return cost;
4190 }
4191
4192 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4193    invariants the computation depends on.  */
4194
4195 static comp_cost
4196 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4197 {
4198   if (!expr)
4199     return no_cost;
4200
4201   find_inv_vars (data, &expr, inv_vars);
4202   return force_expr_to_var_cost (expr, data->speed);
4203 }
4204
4205 /* Returns cost of auto-modifying address expression in shape base + offset.
4206    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4207    address expression.  The address expression has ADDR_MODE in addr space
4208    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4209    speed or size.  */
4210
4211 enum ainc_type
4212 {
4213   AINC_PRE_INC,         /* Pre increment.  */
4214   AINC_PRE_DEC,         /* Pre decrement.  */
4215   AINC_POST_INC,        /* Post increment.  */
4216   AINC_POST_DEC,        /* Post decrement.  */
4217   AINC_NONE             /* Also the number of auto increment types.  */
4218 };
4219
4220 struct ainc_cost_data
4221 {
4222   unsigned costs[AINC_NONE];
4223 };
4224
4225 static comp_cost
4226 get_address_cost_ainc (HOST_WIDE_INT ainc_step, HOST_WIDE_INT ainc_offset,
4227                        machine_mode addr_mode, machine_mode mem_mode,
4228                        addr_space_t as, bool speed)
4229 {
4230   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4231       && !USE_STORE_PRE_DECREMENT (mem_mode)
4232       && !USE_LOAD_POST_DECREMENT (mem_mode)
4233       && !USE_STORE_POST_DECREMENT (mem_mode)
4234       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4235       && !USE_STORE_PRE_INCREMENT (mem_mode)
4236       && !USE_LOAD_POST_INCREMENT (mem_mode)
4237       && !USE_STORE_POST_INCREMENT (mem_mode))
4238     return infinite_cost;
4239
4240   static vec<ainc_cost_data *> ainc_cost_data_list;
4241   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4242   if (idx >= ainc_cost_data_list.length ())
4243     {
4244       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4245
4246       gcc_assert (nsize > idx);
4247       ainc_cost_data_list.safe_grow_cleared (nsize);
4248     }
4249
4250   ainc_cost_data *data = ainc_cost_data_list[idx];
4251   if (data == NULL)
4252     {
4253       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4254
4255       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4256       data->costs[AINC_PRE_DEC] = INFTY;
4257       data->costs[AINC_POST_DEC] = INFTY;
4258       data->costs[AINC_PRE_INC] = INFTY;
4259       data->costs[AINC_POST_INC] = INFTY;
4260       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4261           || USE_STORE_PRE_DECREMENT (mem_mode))
4262         {
4263           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4264
4265           if (memory_address_addr_space_p (mem_mode, addr, as))
4266             data->costs[AINC_PRE_DEC]
4267               = address_cost (addr, mem_mode, as, speed);
4268         }
4269       if (USE_LOAD_POST_DECREMENT (mem_mode)
4270           || USE_STORE_POST_DECREMENT (mem_mode))
4271         {
4272           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4273
4274           if (memory_address_addr_space_p (mem_mode, addr, as))
4275             data->costs[AINC_POST_DEC]
4276               = address_cost (addr, mem_mode, as, speed);
4277         }
4278       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4279           || USE_STORE_PRE_INCREMENT (mem_mode))
4280         {
4281           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4282
4283           if (memory_address_addr_space_p (mem_mode, addr, as))
4284             data->costs[AINC_PRE_INC]
4285               = address_cost (addr, mem_mode, as, speed);
4286         }
4287       if (USE_LOAD_POST_INCREMENT (mem_mode)
4288           || USE_STORE_POST_INCREMENT (mem_mode))
4289         {
4290           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4291
4292           if (memory_address_addr_space_p (mem_mode, addr, as))
4293             data->costs[AINC_POST_INC]
4294               = address_cost (addr, mem_mode, as, speed);
4295         }
4296       ainc_cost_data_list[idx] = data;
4297     }
4298
4299   HOST_WIDE_INT msize = GET_MODE_SIZE (mem_mode);
4300   if (ainc_offset == 0 && msize == ainc_step)
4301     return comp_cost (data->costs[AINC_POST_INC], 0);
4302   if (ainc_offset == 0 && msize == -ainc_step)
4303     return comp_cost (data->costs[AINC_POST_DEC], 0);
4304   if (ainc_offset == msize && msize == ainc_step)
4305     return comp_cost (data->costs[AINC_PRE_INC], 0);
4306   if (ainc_offset == -msize && msize == -ainc_step)
4307     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4308
4309   return infinite_cost;
4310 }
4311
4312 /* Return cost of computing USE's address expression by using CAND.
4313    AFF_INV and AFF_VAR represent invariant and variant parts of the
4314    address expression, respectively.  If AFF_INV is simple, store
4315    the loop invariant variables which are depended by it in INV_VARS;
4316    if AFF_INV is complicated, handle it as a new invariant expression
4317    and record it in INV_EXPR.  RATIO indicates multiple times between
4318    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4319    value to it indicating if this is an auto-increment address.  */
4320
4321 static comp_cost
4322 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4323                   struct iv_cand *cand, aff_tree *aff_inv,
4324                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4325                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4326                   bool *can_autoinc, bool speed)
4327 {
4328   rtx addr;
4329   bool simple_inv = true;
4330   tree comp_inv = NULL_TREE, type = aff_var->type;
4331   comp_cost var_cost = no_cost, cost = no_cost;
4332   struct mem_address parts = {NULL_TREE, integer_one_node,
4333                               NULL_TREE, NULL_TREE, NULL_TREE};
4334   machine_mode addr_mode = TYPE_MODE (type);
4335   machine_mode mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
4336   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4337
4338   if (!aff_combination_const_p (aff_inv))
4339     {
4340       parts.index = integer_one_node;
4341       /* Addressing mode "base + index".  */
4342       if (valid_mem_ref_p (mem_mode, as, &parts))
4343         {
4344           parts.step = wide_int_to_tree (type, ratio);
4345           /* Addressing mode "base + index << scale".  */
4346           if (ratio != 1 && !valid_mem_ref_p (mem_mode, as, &parts))
4347             parts.step = NULL_TREE;
4348
4349           if (aff_inv->offset != 0)
4350             {
4351               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4352               /* Addressing mode "base + index [<< scale] + offset".  */
4353               if (!valid_mem_ref_p (mem_mode, as, &parts))
4354                 parts.offset = NULL_TREE;
4355               else
4356                 aff_inv->offset = 0;
4357             }
4358
4359           move_fixed_address_to_symbol (&parts, aff_inv);
4360           /* Base is fixed address and is moved to symbol part.  */
4361           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4362             parts.base = NULL_TREE;
4363
4364           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4365           if (parts.symbol != NULL_TREE
4366               && !valid_mem_ref_p (mem_mode, as, &parts))
4367             {
4368               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4369               parts.symbol = NULL_TREE;
4370               /* Reset SIMPLE_INV since symbol address needs to be computed
4371                  outside of address expression in this case.  */
4372               simple_inv = false;
4373               /* Symbol part is moved back to base part, it can't be NULL.  */
4374               parts.base = integer_one_node;
4375             }
4376         }
4377       else
4378         parts.index = NULL_TREE;
4379     }
4380   else
4381     {
4382       if (can_autoinc && ratio == 1 && cst_and_fits_in_hwi (cand->iv->step))
4383         {
4384           HOST_WIDE_INT ainc_step = int_cst_value (cand->iv->step);
4385           HOST_WIDE_INT ainc_offset = (aff_inv->offset).to_shwi ();
4386
4387           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4388             ainc_offset += ainc_step;
4389           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4390                                         addr_mode, mem_mode, as, speed);
4391           if (!cost.infinite_cost_p ())
4392             {
4393               *can_autoinc = true;
4394               return cost;
4395             }
4396           cost = no_cost;
4397         }
4398       if (!aff_combination_zero_p (aff_inv))
4399         {
4400           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4401           /* Addressing mode "base + offset".  */
4402           if (!valid_mem_ref_p (mem_mode, as, &parts))
4403             parts.offset = NULL_TREE;
4404           else
4405             aff_inv->offset = 0;
4406         }
4407     }
4408
4409   if (simple_inv)
4410     simple_inv = (aff_inv == NULL
4411                   || aff_combination_const_p (aff_inv)
4412                   || aff_combination_singleton_var_p (aff_inv));
4413   if (!aff_combination_zero_p (aff_inv))
4414     comp_inv = aff_combination_to_tree (aff_inv);
4415   if (comp_inv != NULL_TREE)
4416     cost = force_var_cost (data, comp_inv, inv_vars);
4417   if (ratio != 1 && parts.step == NULL_TREE)
4418     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4419   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4420     var_cost += add_cost (speed, addr_mode);
4421
4422   if (comp_inv && inv_expr && !simple_inv)
4423     {
4424       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4425       /* Clear depends on.  */
4426       if (*inv_expr != NULL && inv_vars && *inv_vars)
4427         bitmap_clear (*inv_vars);
4428
4429       /* Cost of small invariant expression adjusted against loop niters
4430          is usually zero, which makes it difficult to be differentiated
4431          from candidate based on loop invariant variables.  Secondly, the
4432          generated invariant expression may not be hoisted out of loop by
4433          following pass.  We penalize the cost by rounding up in order to
4434          neutralize such effects.  */
4435       cost.cost = adjust_setup_cost (data, cost.cost, true);
4436       cost.scratch = cost.cost;
4437     }
4438
4439   cost += var_cost;
4440   addr = addr_for_mem_ref (&parts, as, false);
4441   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4442   cost += address_cost (addr, mem_mode, as, speed);
4443
4444   if (parts.symbol != NULL_TREE)
4445     cost.complexity += 1;
4446   if (parts.step != NULL_TREE && !integer_onep (parts.step))
4447     cost.complexity += 1;
4448   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4449     cost.complexity += 1;
4450   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4451     cost.complexity += 1;
4452
4453   return cost;
4454 }
4455
4456 /* Scale (multiply) the computed COST (except scratch part that should be
4457    hoisted out a loop) by header->frequency / AT->frequency, which makes
4458    expected cost more accurate.  */
4459
4460 static comp_cost
4461 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4462 {
4463    int loop_freq = data->current_loop->header->frequency;
4464    int bb_freq = gimple_bb (at)->frequency;
4465    if (loop_freq != 0)
4466      {
4467        gcc_assert (cost.scratch <= cost.cost);
4468        int scaled_cost
4469          = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4470
4471        if (dump_file && (dump_flags & TDF_DETAILS))
4472          fprintf (dump_file, "Scaling cost based on bb prob "
4473                   "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4474                   1.0f * bb_freq / loop_freq, cost.cost,
4475                   cost.scratch, scaled_cost, bb_freq, loop_freq);
4476
4477        cost.cost = scaled_cost;
4478      }
4479
4480   return cost;
4481 }
4482
4483 /* Determines the cost of the computation by that USE is expressed
4484    from induction variable CAND.  If ADDRESS_P is true, we just need
4485    to create an address from it, otherwise we want to get it into
4486    register.  A set of invariants we depend on is stored in INV_VARS.
4487    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4488    addressing is likely.  If INV_EXPR is nonnull, record invariant
4489    expr entry in it.  */
4490
4491 static comp_cost
4492 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4493                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4494                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4495 {
4496   gimple *at = use->stmt;
4497   tree ubase = use->iv->base, cbase = cand->iv->base;
4498   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4499   tree comp_inv = NULL_TREE;
4500   HOST_WIDE_INT ratio, aratio;
4501   comp_cost cost;
4502   widest_int rat;
4503   aff_tree aff_inv, aff_var;
4504   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4505
4506   if (inv_vars)
4507     *inv_vars = NULL;
4508   if (can_autoinc)
4509     *can_autoinc = false;
4510   if (inv_expr)
4511     *inv_expr = NULL;
4512
4513   /* Check if we have enough precision to express the values of use.  */
4514   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4515     return infinite_cost;
4516
4517   if (address_p
4518       || (use->iv->base_object
4519           && cand->iv->base_object
4520           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4521           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4522     {
4523       /* Do not try to express address of an object with computation based
4524          on address of a different object.  This may cause problems in rtl
4525          level alias analysis (that does not expect this to be happening,
4526          as this is illegal in C), and would be unlikely to be useful
4527          anyway.  */
4528       if (use->iv->base_object
4529           && cand->iv->base_object
4530           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4531         return infinite_cost;
4532     }
4533
4534   if (!get_computation_aff_1 (data->current_loop, at, use,
4535                               cand, &aff_inv, &aff_var, &rat)
4536       || !wi::fits_shwi_p (rat))
4537     return infinite_cost;
4538
4539   ratio = rat.to_shwi ();
4540   if (address_p)
4541     {
4542       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4543                                inv_vars, inv_expr, can_autoinc, speed);
4544       return get_scaled_computation_cost_at (data, at, cost);
4545     }
4546
4547   bool simple_inv = (aff_combination_const_p (&aff_inv)
4548                      || aff_combination_singleton_var_p (&aff_inv));
4549   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4550   aff_combination_convert (&aff_inv, signed_type);
4551   if (!aff_combination_zero_p (&aff_inv))
4552     comp_inv = aff_combination_to_tree (&aff_inv);
4553
4554   cost = force_var_cost (data, comp_inv, inv_vars);
4555   if (comp_inv && inv_expr && !simple_inv)
4556     {
4557       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4558       /* Clear depends on.  */
4559       if (*inv_expr != NULL && inv_vars && *inv_vars)
4560         bitmap_clear (*inv_vars);
4561
4562       cost.cost = adjust_setup_cost (data, cost.cost);
4563       /* Record setup cost in scratch field.  */
4564       cost.scratch = cost.cost;
4565     }
4566   /* Cost of constant integer can be covered when adding invariant part to
4567      variant part.  */
4568   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4569     cost = no_cost;
4570
4571   /* Need type narrowing to represent use with cand.  */
4572   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4573     {
4574       machine_mode outer_mode = TYPE_MODE (utype);
4575       machine_mode inner_mode = TYPE_MODE (ctype);
4576       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4577     }
4578
4579   /* Turn a + i * (-c) into a - i * c.  */
4580   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4581     aratio = -ratio;
4582   else
4583     aratio = ratio;
4584
4585   if (ratio != 1)
4586     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4587
4588   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4589      instruction.  */
4590   /* Need to add up the invariant and variant parts.  */
4591   if (comp_inv && !integer_zerop (comp_inv))
4592     cost += add_cost (speed, TYPE_MODE (utype));
4593
4594   return get_scaled_computation_cost_at (data, at, cost);
4595 }
4596
4597 /* Determines cost of computing the use in GROUP with CAND in a generic
4598    expression.  */
4599
4600 static bool
4601 determine_group_iv_cost_generic (struct ivopts_data *data,
4602                                  struct iv_group *group, struct iv_cand *cand)
4603 {
4604   comp_cost cost;
4605   iv_inv_expr_ent *inv_expr = NULL;
4606   bitmap inv_vars = NULL, inv_exprs = NULL;
4607   struct iv_use *use = group->vuses[0];
4608
4609   /* The simple case first -- if we need to express value of the preserved
4610      original biv, the cost is 0.  This also prevents us from counting the
4611      cost of increment twice -- once at this use and once in the cost of
4612      the candidate.  */
4613   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4614     cost = no_cost;
4615   else
4616     cost = get_computation_cost (data, use, cand, false,
4617                                  &inv_vars, NULL, &inv_expr);
4618
4619   if (inv_expr)
4620     {
4621       inv_exprs = BITMAP_ALLOC (NULL);
4622       bitmap_set_bit (inv_exprs, inv_expr->id);
4623     }
4624   set_group_iv_cost (data, group, cand, cost, inv_vars,
4625                      NULL_TREE, ERROR_MARK, inv_exprs);
4626   return !cost.infinite_cost_p ();
4627 }
4628
4629 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4630
4631 static bool
4632 determine_group_iv_cost_address (struct ivopts_data *data,
4633                                  struct iv_group *group, struct iv_cand *cand)
4634 {
4635   unsigned i;
4636   bitmap inv_vars = NULL, inv_exprs = NULL;
4637   bool can_autoinc;
4638   iv_inv_expr_ent *inv_expr = NULL;
4639   struct iv_use *use = group->vuses[0];
4640   comp_cost sum_cost = no_cost, cost;
4641
4642   cost = get_computation_cost (data, use, cand, true,
4643                                &inv_vars, &can_autoinc, &inv_expr);
4644
4645   if (inv_expr)
4646     {
4647       inv_exprs = BITMAP_ALLOC (NULL);
4648       bitmap_set_bit (inv_exprs, inv_expr->id);
4649     }
4650   sum_cost = cost;
4651   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4652     {
4653       if (can_autoinc)
4654         sum_cost -= cand->cost_step;
4655       /* If we generated the candidate solely for exploiting autoincrement
4656          opportunities, and it turns out it can't be used, set the cost to
4657          infinity to make sure we ignore it.  */
4658       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4659         sum_cost = infinite_cost;
4660     }
4661
4662   /* Uses in a group can share setup code, so only add setup cost once.  */
4663   cost -= cost.scratch;
4664   /* Compute and add costs for rest uses of this group.  */
4665   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4666     {
4667       struct iv_use *next = group->vuses[i];
4668
4669       /* TODO: We could skip computing cost for sub iv_use when it has the
4670          same cost as the first iv_use, but the cost really depends on the
4671          offset and where the iv_use is.  */
4672         cost = get_computation_cost (data, next, cand, true,
4673                                      NULL, &can_autoinc, &inv_expr);
4674         if (inv_expr)
4675           {
4676             if (!inv_exprs)
4677               inv_exprs = BITMAP_ALLOC (NULL);
4678
4679             bitmap_set_bit (inv_exprs, inv_expr->id);
4680           }
4681       sum_cost += cost;
4682     }
4683   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4684                      NULL_TREE, ERROR_MARK, inv_exprs);
4685
4686   return !sum_cost.infinite_cost_p ();
4687 }
4688
4689 /* Computes value of candidate CAND at position AT in iteration NITER, and
4690    stores it to VAL.  */
4691
4692 static void
4693 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4694                aff_tree *val)
4695 {
4696   aff_tree step, delta, nit;
4697   struct iv *iv = cand->iv;
4698   tree type = TREE_TYPE (iv->base);
4699   tree steptype;
4700   if (POINTER_TYPE_P (type))
4701     steptype = sizetype;
4702   else
4703     steptype = unsigned_type_for (type);
4704
4705   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4706   aff_combination_convert (&step, steptype);
4707   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4708   aff_combination_convert (&nit, steptype);
4709   aff_combination_mult (&nit, &step, &delta);
4710   if (stmt_after_increment (loop, cand, at))
4711     aff_combination_add (&delta, &step);
4712
4713   tree_to_aff_combination (iv->base, type, val);
4714   if (!POINTER_TYPE_P (type))
4715     aff_combination_convert (val, steptype);
4716   aff_combination_add (val, &delta);
4717 }
4718
4719 /* Returns period of induction variable iv.  */
4720
4721 static tree
4722 iv_period (struct iv *iv)
4723 {
4724   tree step = iv->step, period, type;
4725   tree pow2div;
4726
4727   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4728
4729   type = unsigned_type_for (TREE_TYPE (step));
4730   /* Period of the iv is lcm (step, type_range)/step -1,
4731      i.e., N*type_range/step - 1. Since type range is power
4732      of two, N == (step >> num_of_ending_zeros_binary (step),
4733      so the final result is
4734
4735        (type_range >> num_of_ending_zeros_binary (step)) - 1
4736
4737   */
4738   pow2div = num_ending_zeros (step);
4739
4740   period = build_low_bits_mask (type,
4741                                 (TYPE_PRECISION (type)
4742                                  - tree_to_uhwi (pow2div)));
4743
4744   return period;
4745 }
4746
4747 /* Returns the comparison operator used when eliminating the iv USE.  */
4748
4749 static enum tree_code
4750 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4751 {
4752   struct loop *loop = data->current_loop;
4753   basic_block ex_bb;
4754   edge exit;
4755
4756   ex_bb = gimple_bb (use->stmt);
4757   exit = EDGE_SUCC (ex_bb, 0);
4758   if (flow_bb_inside_loop_p (loop, exit->dest))
4759     exit = EDGE_SUCC (ex_bb, 1);
4760
4761   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4762 }
4763
4764 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4765    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4766    calculation is performed in non-wrapping type.
4767
4768    TODO: More generally, we could test for the situation that
4769          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4770          This would require knowing the sign of OFFSET.  */
4771
4772 static bool
4773 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4774 {
4775   enum tree_code code;
4776   tree e1, e2;
4777   aff_tree aff_e1, aff_e2, aff_offset;
4778
4779   if (!nowrap_type_p (TREE_TYPE (base)))
4780     return false;
4781
4782   base = expand_simple_operations (base);
4783
4784   if (TREE_CODE (base) == SSA_NAME)
4785     {
4786       gimple *stmt = SSA_NAME_DEF_STMT (base);
4787
4788       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4789         return false;
4790
4791       code = gimple_assign_rhs_code (stmt);
4792       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4793         return false;
4794
4795       e1 = gimple_assign_rhs1 (stmt);
4796       e2 = gimple_assign_rhs2 (stmt);
4797     }
4798   else
4799     {
4800       code = TREE_CODE (base);
4801       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4802         return false;
4803       e1 = TREE_OPERAND (base, 0);
4804       e2 = TREE_OPERAND (base, 1);
4805     }
4806
4807   /* Use affine expansion as deeper inspection to prove the equality.  */
4808   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4809                                   &aff_e2, &data->name_expansion_cache);
4810   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4811                                   &aff_offset, &data->name_expansion_cache);
4812   aff_combination_scale (&aff_offset, -1);
4813   switch (code)
4814     {
4815     case PLUS_EXPR:
4816       aff_combination_add (&aff_e2, &aff_offset);
4817       if (aff_combination_zero_p (&aff_e2))
4818         return true;
4819
4820       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4821                                       &aff_e1, &data->name_expansion_cache);
4822       aff_combination_add (&aff_e1, &aff_offset);
4823       return aff_combination_zero_p (&aff_e1);
4824
4825     case POINTER_PLUS_EXPR:
4826       aff_combination_add (&aff_e2, &aff_offset);
4827       return aff_combination_zero_p (&aff_e2);
4828
4829     default:
4830       return false;
4831     }
4832 }
4833
4834 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4835    comparison with CAND.  NITER describes the number of iterations of
4836    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4837
4838    We aim to handle the following situation:
4839
4840    sometype *base, *p;
4841    int a, b, i;
4842
4843    i = a;
4844    p = p_0 = base + a;
4845
4846    do
4847      {
4848        bla (*p);
4849        p++;
4850        i++;
4851      }
4852    while (i < b);
4853
4854    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4855    We aim to optimize this to
4856
4857    p = p_0 = base + a;
4858    do
4859      {
4860        bla (*p);
4861        p++;
4862      }
4863    while (p < p_0 - a + b);
4864
4865    This preserves the correctness, since the pointer arithmetics does not
4866    overflow.  More precisely:
4867
4868    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4869       overflow in computing it or the values of p.
4870    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4871       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4872
4873 static bool
4874 iv_elimination_compare_lt (struct ivopts_data *data,
4875                            struct iv_cand *cand, enum tree_code *comp_p,
4876                            struct tree_niter_desc *niter)
4877 {
4878   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4879   struct aff_tree nit, tmpa, tmpb;
4880   enum tree_code comp;
4881   HOST_WIDE_INT step;
4882
4883   /* We need to know that the candidate induction variable does not overflow.
4884      While more complex analysis may be used to prove this, for now just
4885      check that the variable appears in the original program and that it
4886      is computed in a type that guarantees no overflows.  */
4887   cand_type = TREE_TYPE (cand->iv->base);
4888   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4889     return false;
4890
4891   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4892      the calculation of the BOUND could overflow, making the comparison
4893      invalid.  */
4894   if (!data->loop_single_exit_p)
4895     return false;
4896
4897   /* We need to be able to decide whether candidate is increasing or decreasing
4898      in order to choose the right comparison operator.  */
4899   if (!cst_and_fits_in_hwi (cand->iv->step))
4900     return false;
4901   step = int_cst_value (cand->iv->step);
4902
4903   /* Check that the number of iterations matches the expected pattern:
4904      a + 1 > b ? 0 : b - a - 1.  */
4905   mbz = niter->may_be_zero;
4906   if (TREE_CODE (mbz) == GT_EXPR)
4907     {
4908       /* Handle a + 1 > b.  */
4909       tree op0 = TREE_OPERAND (mbz, 0);
4910       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4911         {
4912           a = TREE_OPERAND (op0, 0);
4913           b = TREE_OPERAND (mbz, 1);
4914         }
4915       else
4916         return false;
4917     }
4918   else if (TREE_CODE (mbz) == LT_EXPR)
4919     {
4920       tree op1 = TREE_OPERAND (mbz, 1);
4921
4922       /* Handle b < a + 1.  */
4923       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4924         {
4925           a = TREE_OPERAND (op1, 0);
4926           b = TREE_OPERAND (mbz, 0);
4927         }
4928       else
4929         return false;
4930     }
4931   else
4932     return false;
4933
4934   /* Expected number of iterations is B - A - 1.  Check that it matches
4935      the actual number, i.e., that B - A - NITER = 1.  */
4936   tree_to_aff_combination (niter->niter, nit_type, &nit);
4937   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4938   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4939   aff_combination_scale (&nit, -1);
4940   aff_combination_scale (&tmpa, -1);
4941   aff_combination_add (&tmpb, &tmpa);
4942   aff_combination_add (&tmpb, &nit);
4943   if (tmpb.n != 0 || tmpb.offset != 1)
4944     return false;
4945
4946   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4947      overflow.  */
4948   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4949                         cand->iv->step,
4950                         fold_convert (TREE_TYPE (cand->iv->step), a));
4951   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4952     return false;
4953
4954   /* Determine the new comparison operator.  */
4955   comp = step < 0 ? GT_EXPR : LT_EXPR;
4956   if (*comp_p == NE_EXPR)
4957     *comp_p = comp;
4958   else if (*comp_p == EQ_EXPR)
4959     *comp_p = invert_tree_comparison (comp, false);
4960   else
4961     gcc_unreachable ();
4962
4963   return true;
4964 }
4965
4966 /* Check whether it is possible to express the condition in USE by comparison
4967    of candidate CAND.  If so, store the value compared with to BOUND, and the
4968    comparison operator to COMP.  */
4969
4970 static bool
4971 may_eliminate_iv (struct ivopts_data *data,
4972                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4973                   enum tree_code *comp)
4974 {
4975   basic_block ex_bb;
4976   edge exit;
4977   tree period;
4978   struct loop *loop = data->current_loop;
4979   aff_tree bnd;
4980   struct tree_niter_desc *desc = NULL;
4981
4982   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4983     return false;
4984
4985   /* For now works only for exits that dominate the loop latch.
4986      TODO: extend to other conditions inside loop body.  */
4987   ex_bb = gimple_bb (use->stmt);
4988   if (use->stmt != last_stmt (ex_bb)
4989       || gimple_code (use->stmt) != GIMPLE_COND
4990       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4991     return false;
4992
4993   exit = EDGE_SUCC (ex_bb, 0);
4994   if (flow_bb_inside_loop_p (loop, exit->dest))
4995     exit = EDGE_SUCC (ex_bb, 1);
4996   if (flow_bb_inside_loop_p (loop, exit->dest))
4997     return false;
4998
4999   desc = niter_for_exit (data, exit);
5000   if (!desc)
5001     return false;
5002
5003   /* Determine whether we can use the variable to test the exit condition.
5004      This is the case iff the period of the induction variable is greater
5005      than the number of iterations for which the exit condition is true.  */
5006   period = iv_period (cand->iv);
5007
5008   /* If the number of iterations is constant, compare against it directly.  */
5009   if (TREE_CODE (desc->niter) == INTEGER_CST)
5010     {
5011       /* See cand_value_at.  */
5012       if (stmt_after_increment (loop, cand, use->stmt))
5013         {
5014           if (!tree_int_cst_lt (desc->niter, period))
5015             return false;
5016         }
5017       else
5018         {
5019           if (tree_int_cst_lt (period, desc->niter))
5020             return false;
5021         }
5022     }
5023
5024   /* If not, and if this is the only possible exit of the loop, see whether
5025      we can get a conservative estimate on the number of iterations of the
5026      entire loop and compare against that instead.  */
5027   else
5028     {
5029       widest_int period_value, max_niter;
5030
5031       max_niter = desc->max;
5032       if (stmt_after_increment (loop, cand, use->stmt))
5033         max_niter += 1;
5034       period_value = wi::to_widest (period);
5035       if (wi::gtu_p (max_niter, period_value))
5036         {
5037           /* See if we can take advantage of inferred loop bound
5038              information.  */
5039           if (data->loop_single_exit_p)
5040             {
5041               if (!max_loop_iterations (loop, &max_niter))
5042                 return false;
5043               /* The loop bound is already adjusted by adding 1.  */
5044               if (wi::gtu_p (max_niter, period_value))
5045                 return false;
5046             }
5047           else
5048             return false;
5049         }
5050     }
5051
5052   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5053
5054   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5055                          aff_combination_to_tree (&bnd));
5056   *comp = iv_elimination_compare (data, use);
5057
5058   /* It is unlikely that computing the number of iterations using division
5059      would be more profitable than keeping the original induction variable.  */
5060   if (expression_expensive_p (*bound))
5061     return false;
5062
5063   /* Sometimes, it is possible to handle the situation that the number of
5064      iterations may be zero unless additional assumptions by using <
5065      instead of != in the exit condition.
5066
5067      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5068            base the exit condition on it.  However, that is often too
5069            expensive.  */
5070   if (!integer_zerop (desc->may_be_zero))
5071     return iv_elimination_compare_lt (data, cand, comp, desc);
5072
5073   return true;
5074 }
5075
5076  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5077     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5078
5079 static int
5080 parm_decl_cost (struct ivopts_data *data, tree bound)
5081 {
5082   tree sbound = bound;
5083   STRIP_NOPS (sbound);
5084
5085   if (TREE_CODE (sbound) == SSA_NAME
5086       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5087       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5088       && data->body_includes_call)
5089     return COSTS_N_INSNS (1);
5090
5091   return 0;
5092 }
5093
5094 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5095
5096 static bool
5097 determine_group_iv_cost_cond (struct ivopts_data *data,
5098                               struct iv_group *group, struct iv_cand *cand)
5099 {
5100   tree bound = NULL_TREE;
5101   struct iv *cmp_iv;
5102   bitmap inv_exprs = NULL;
5103   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5104   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5105   enum comp_iv_rewrite rewrite_type;
5106   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5107   tree *control_var, *bound_cst;
5108   enum tree_code comp = ERROR_MARK;
5109   struct iv_use *use = group->vuses[0];
5110
5111   /* Extract condition operands.  */
5112   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5113                                         &bound_cst, NULL, &cmp_iv);
5114   gcc_assert (rewrite_type != COMP_IV_NA);
5115
5116   /* Try iv elimination.  */
5117   if (rewrite_type == COMP_IV_ELIM
5118       && may_eliminate_iv (data, use, cand, &bound, &comp))
5119     {
5120       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5121       if (elim_cost.cost == 0)
5122         elim_cost.cost = parm_decl_cost (data, bound);
5123       else if (TREE_CODE (bound) == INTEGER_CST)
5124         elim_cost.cost = 0;
5125       /* If we replace a loop condition 'i < n' with 'p < base + n',
5126          inv_vars_elim will have 'base' and 'n' set, which implies that both
5127          'base' and 'n' will be live during the loop.    More likely,
5128          'base + n' will be loop invariant, resulting in only one live value
5129          during the loop.  So in that case we clear inv_vars_elim and set
5130          inv_expr_elim instead.  */
5131       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5132         {
5133           inv_expr_elim = get_loop_invariant_expr (data, bound);
5134           bitmap_clear (inv_vars_elim);
5135         }
5136       /* The bound is a loop invariant, so it will be only computed
5137          once.  */
5138       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5139     }
5140
5141   /* When the condition is a comparison of the candidate IV against
5142      zero, prefer this IV.
5143
5144      TODO: The constant that we're subtracting from the cost should
5145      be target-dependent.  This information should be added to the
5146      target costs for each backend.  */
5147   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5148       && integer_zerop (*bound_cst)
5149       && (operand_equal_p (*control_var, cand->var_after, 0)
5150           || operand_equal_p (*control_var, cand->var_before, 0)))
5151     elim_cost -= 1;
5152
5153   express_cost = get_computation_cost (data, use, cand, false,
5154                                        &inv_vars_express, NULL,
5155                                        &inv_expr_express);
5156   if (cmp_iv != NULL)
5157     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5158
5159   /* Count the cost of the original bound as well.  */
5160   bound_cost = force_var_cost (data, *bound_cst, NULL);
5161   if (bound_cost.cost == 0)
5162     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5163   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5164     bound_cost.cost = 0;
5165   express_cost += bound_cost;
5166
5167   /* Choose the better approach, preferring the eliminated IV. */
5168   if (elim_cost <= express_cost)
5169     {
5170       cost = elim_cost;
5171       inv_vars = inv_vars_elim;
5172       inv_vars_elim = NULL;
5173       inv_expr = inv_expr_elim;
5174     }
5175   else
5176     {
5177       cost = express_cost;
5178       inv_vars = inv_vars_express;
5179       inv_vars_express = NULL;
5180       bound = NULL_TREE;
5181       comp = ERROR_MARK;
5182       inv_expr = inv_expr_express;
5183     }
5184
5185   if (inv_expr)
5186     {
5187       inv_exprs = BITMAP_ALLOC (NULL);
5188       bitmap_set_bit (inv_exprs, inv_expr->id);
5189     }
5190   set_group_iv_cost (data, group, cand, cost,
5191                      inv_vars, bound, comp, inv_exprs);
5192
5193   if (inv_vars_elim)
5194     BITMAP_FREE (inv_vars_elim);
5195   if (inv_vars_express)
5196     BITMAP_FREE (inv_vars_express);
5197
5198   return !cost.infinite_cost_p ();
5199 }
5200
5201 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5202    if USE cannot be represented with CAND.  */
5203
5204 static bool
5205 determine_group_iv_cost (struct ivopts_data *data,
5206                          struct iv_group *group, struct iv_cand *cand)
5207 {
5208   switch (group->type)
5209     {
5210     case USE_NONLINEAR_EXPR:
5211       return determine_group_iv_cost_generic (data, group, cand);
5212
5213     case USE_ADDRESS:
5214       return determine_group_iv_cost_address (data, group, cand);
5215
5216     case USE_COMPARE:
5217       return determine_group_iv_cost_cond (data, group, cand);
5218
5219     default:
5220       gcc_unreachable ();
5221     }
5222 }
5223
5224 /* Return true if get_computation_cost indicates that autoincrement is
5225    a possibility for the pair of USE and CAND, false otherwise.  */
5226
5227 static bool
5228 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5229                            struct iv_cand *cand)
5230 {
5231   if (use->type != USE_ADDRESS)
5232     return false;
5233
5234   bool can_autoinc = false;
5235   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5236   return can_autoinc;
5237 }
5238
5239 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5240    use that allows autoincrement, and set their AINC_USE if possible.  */
5241
5242 static void
5243 set_autoinc_for_original_candidates (struct ivopts_data *data)
5244 {
5245   unsigned i, j;
5246
5247   for (i = 0; i < data->vcands.length (); i++)
5248     {
5249       struct iv_cand *cand = data->vcands[i];
5250       struct iv_use *closest_before = NULL;
5251       struct iv_use *closest_after = NULL;
5252       if (cand->pos != IP_ORIGINAL)
5253         continue;
5254
5255       for (j = 0; j < data->vgroups.length (); j++)
5256         {
5257           struct iv_group *group = data->vgroups[j];
5258           struct iv_use *use = group->vuses[0];
5259           unsigned uid = gimple_uid (use->stmt);
5260
5261           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5262             continue;
5263
5264           if (uid < gimple_uid (cand->incremented_at)
5265               && (closest_before == NULL
5266                   || uid > gimple_uid (closest_before->stmt)))
5267             closest_before = use;
5268
5269           if (uid > gimple_uid (cand->incremented_at)
5270               && (closest_after == NULL
5271                   || uid < gimple_uid (closest_after->stmt)))
5272             closest_after = use;
5273         }
5274
5275       if (closest_before != NULL
5276           && autoinc_possible_for_pair (data, closest_before, cand))
5277         cand->ainc_use = closest_before;
5278       else if (closest_after != NULL
5279                && autoinc_possible_for_pair (data, closest_after, cand))
5280         cand->ainc_use = closest_after;
5281     }
5282 }
5283
5284 /* Relate compare use with all candidates.  */
5285
5286 static void
5287 relate_compare_use_with_all_cands (struct ivopts_data *data)
5288 {
5289   unsigned i, count = data->vcands.length ();
5290   for (i = 0; i < data->vgroups.length (); i++)
5291     {
5292       struct iv_group *group = data->vgroups[i];
5293
5294       if (group->type == USE_COMPARE)
5295         bitmap_set_range (group->related_cands, 0, count);
5296     }
5297 }
5298
5299 /* Finds the candidates for the induction variables.  */
5300
5301 static void
5302 find_iv_candidates (struct ivopts_data *data)
5303 {
5304   /* Add commonly used ivs.  */
5305   add_standard_iv_candidates (data);
5306
5307   /* Add old induction variables.  */
5308   add_iv_candidate_for_bivs (data);
5309
5310   /* Add induction variables derived from uses.  */
5311   add_iv_candidate_for_groups (data);
5312
5313   set_autoinc_for_original_candidates (data);
5314
5315   /* Record the important candidates.  */
5316   record_important_candidates (data);
5317
5318   /* Relate compare iv_use with all candidates.  */
5319   if (!data->consider_all_candidates)
5320     relate_compare_use_with_all_cands (data);
5321
5322   if (dump_file && (dump_flags & TDF_DETAILS))
5323     {
5324       unsigned i;
5325
5326       fprintf (dump_file, "\n<Important Candidates>:\t");
5327       for (i = 0; i < data->vcands.length (); i++)
5328         if (data->vcands[i]->important)
5329           fprintf (dump_file, " %d,", data->vcands[i]->id);
5330       fprintf (dump_file, "\n");
5331
5332       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5333       for (i = 0; i < data->vgroups.length (); i++)
5334         {
5335           struct iv_group *group = data->vgroups[i];
5336
5337           if (group->related_cands)
5338             {
5339               fprintf (dump_file, "  Group %d:\t", group->id);
5340               dump_bitmap (dump_file, group->related_cands);
5341             }
5342         }
5343       fprintf (dump_file, "\n");
5344     }
5345 }
5346
5347 /* Determines costs of computing use of iv with an iv candidate.  */
5348
5349 static void
5350 determine_group_iv_costs (struct ivopts_data *data)
5351 {
5352   unsigned i, j;
5353   struct iv_cand *cand;
5354   struct iv_group *group;
5355   bitmap to_clear = BITMAP_ALLOC (NULL);
5356
5357   alloc_use_cost_map (data);
5358
5359   for (i = 0; i < data->vgroups.length (); i++)
5360     {
5361       group = data->vgroups[i];
5362
5363       if (data->consider_all_candidates)
5364         {
5365           for (j = 0; j < data->vcands.length (); j++)
5366             {
5367               cand = data->vcands[j];
5368               determine_group_iv_cost (data, group, cand);
5369             }
5370         }
5371       else
5372         {
5373           bitmap_iterator bi;
5374
5375           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5376             {
5377               cand = data->vcands[j];
5378               if (!determine_group_iv_cost (data, group, cand))
5379                 bitmap_set_bit (to_clear, j);
5380             }
5381
5382           /* Remove the candidates for that the cost is infinite from
5383              the list of related candidates.  */
5384           bitmap_and_compl_into (group->related_cands, to_clear);
5385           bitmap_clear (to_clear);
5386         }
5387     }
5388
5389   BITMAP_FREE (to_clear);
5390
5391   if (dump_file && (dump_flags & TDF_DETAILS))
5392     {
5393       bitmap_iterator bi;
5394
5395       /* Dump invariant variables.  */
5396       fprintf (dump_file, "\n<Invariant Vars>:\n");
5397       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5398         {
5399           struct version_info *info = ver_info (data, i);
5400           if (info->inv_id)
5401             {
5402               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5403               print_generic_expr (dump_file, info->name, TDF_SLIM);
5404               fprintf (dump_file, "%s\n",
5405                        info->has_nonlin_use ? "" : "\t(eliminable)");
5406             }
5407         }
5408
5409       /* Dump invariant expressions.  */
5410       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5411       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5412
5413       for (hash_table<iv_inv_expr_hasher>::iterator it
5414            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5415            ++it)
5416         list.safe_push (*it);
5417
5418       list.qsort (sort_iv_inv_expr_ent);
5419
5420       for (i = 0; i < list.length (); ++i)
5421         {
5422           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5423           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5424           fprintf (dump_file, "\n");
5425         }
5426
5427       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5428
5429       for (i = 0; i < data->vgroups.length (); i++)
5430         {
5431           group = data->vgroups[i];
5432
5433           fprintf (dump_file, "Group %d:\n", i);
5434           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5435           for (j = 0; j < group->n_map_members; j++)
5436             {
5437               if (!group->cost_map[j].cand
5438                   || group->cost_map[j].cost.infinite_cost_p ())
5439                 continue;
5440
5441               fprintf (dump_file, "  %d\t%d\t%d\t",
5442                        group->cost_map[j].cand->id,
5443                        group->cost_map[j].cost.cost,
5444                        group->cost_map[j].cost.complexity);
5445               if (!group->cost_map[j].inv_exprs
5446                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5447                 fprintf (dump_file, "NIL;\t");
5448               else
5449                 bitmap_print (dump_file,
5450                               group->cost_map[j].inv_exprs, "", ";\t");
5451               if (!group->cost_map[j].inv_vars
5452                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5453                 fprintf (dump_file, "NIL;\n");
5454               else
5455                 bitmap_print (dump_file,
5456                               group->cost_map[j].inv_vars, "", "\n");
5457             }
5458
5459           fprintf (dump_file, "\n");
5460         }
5461       fprintf (dump_file, "\n");
5462     }
5463 }
5464
5465 /* Determines cost of the candidate CAND.  */
5466
5467 static void
5468 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5469 {
5470   comp_cost cost_base;
5471   unsigned cost, cost_step;
5472   tree base;
5473
5474   gcc_assert (cand->iv != NULL);
5475
5476   /* There are two costs associated with the candidate -- its increment
5477      and its initialization.  The second is almost negligible for any loop
5478      that rolls enough, so we take it just very little into account.  */
5479
5480   base = cand->iv->base;
5481   cost_base = force_var_cost (data, base, NULL);
5482   /* It will be exceptional that the iv register happens to be initialized with
5483      the proper value at no cost.  In general, there will at least be a regcopy
5484      or a const set.  */
5485   if (cost_base.cost == 0)
5486     cost_base.cost = COSTS_N_INSNS (1);
5487   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5488
5489   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5490
5491   /* Prefer the original ivs unless we may gain something by replacing it.
5492      The reason is to make debugging simpler; so this is not relevant for
5493      artificial ivs created by other optimization passes.  */
5494   if (cand->pos != IP_ORIGINAL
5495       || !SSA_NAME_VAR (cand->var_before)
5496       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5497     cost++;
5498
5499   /* Prefer not to insert statements into latch unless there are some
5500      already (so that we do not create unnecessary jumps).  */
5501   if (cand->pos == IP_END
5502       && empty_block_p (ip_end_pos (data->current_loop)))
5503     cost++;
5504
5505   cand->cost = cost;
5506   cand->cost_step = cost_step;
5507 }
5508
5509 /* Determines costs of computation of the candidates.  */
5510
5511 static void
5512 determine_iv_costs (struct ivopts_data *data)
5513 {
5514   unsigned i;
5515
5516   if (dump_file && (dump_flags & TDF_DETAILS))
5517     {
5518       fprintf (dump_file, "<Candidate Costs>:\n");
5519       fprintf (dump_file, "  cand\tcost\n");
5520     }
5521
5522   for (i = 0; i < data->vcands.length (); i++)
5523     {
5524       struct iv_cand *cand = data->vcands[i];
5525
5526       determine_iv_cost (data, cand);
5527
5528       if (dump_file && (dump_flags & TDF_DETAILS))
5529         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5530     }
5531
5532   if (dump_file && (dump_flags & TDF_DETAILS))
5533     fprintf (dump_file, "\n");
5534 }
5535
5536 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5537    induction variables.  Note N_INVS includes both invariant variables and
5538    invariant expressions.  */
5539
5540 static unsigned
5541 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5542                               unsigned n_cands)
5543 {
5544   unsigned cost;
5545   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5546   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5547   bool speed = data->speed;
5548
5549   /* If there is a call in the loop body, the call-clobbered registers
5550      are not available for loop invariants.  */
5551   if (data->body_includes_call)
5552     available_regs = available_regs - target_clobbered_regs;
5553
5554   /* If we have enough registers.  */
5555   if (regs_needed + target_res_regs < available_regs)
5556     cost = n_new;
5557   /* If close to running out of registers, try to preserve them.  */
5558   else if (regs_needed <= available_regs)
5559     cost = target_reg_cost [speed] * regs_needed;
5560   /* If we run out of available registers but the number of candidates
5561      does not, we penalize extra registers using target_spill_cost.  */
5562   else if (n_cands <= available_regs)
5563     cost = target_reg_cost [speed] * available_regs
5564            + target_spill_cost [speed] * (regs_needed - available_regs);
5565   /* If the number of candidates runs out available registers, we penalize
5566      extra candidate registers using target_spill_cost * 2.  Because it is
5567      more expensive to spill induction variable than invariant.  */
5568   else
5569     cost = target_reg_cost [speed] * available_regs
5570            + target_spill_cost [speed] * (n_cands - available_regs) * 2
5571            + target_spill_cost [speed] * (regs_needed - n_cands);
5572
5573   /* Finally, add the number of candidates, so that we prefer eliminating
5574      induction variables if possible.  */
5575   return cost + n_cands;
5576 }
5577
5578 /* For each size of the induction variable set determine the penalty.  */
5579
5580 static void
5581 determine_set_costs (struct ivopts_data *data)
5582 {
5583   unsigned j, n;
5584   gphi *phi;
5585   gphi_iterator psi;
5586   tree op;
5587   struct loop *loop = data->current_loop;
5588   bitmap_iterator bi;
5589
5590   if (dump_file && (dump_flags & TDF_DETAILS))
5591     {
5592       fprintf (dump_file, "<Global Costs>:\n");
5593       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5594       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5595       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5596       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5597     }
5598
5599   n = 0;
5600   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5601     {
5602       phi = psi.phi ();
5603       op = PHI_RESULT (phi);
5604
5605       if (virtual_operand_p (op))
5606         continue;
5607
5608       if (get_iv (data, op))
5609         continue;
5610
5611       if (!POINTER_TYPE_P (TREE_TYPE (op))
5612           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5613         continue;
5614
5615       n++;
5616     }
5617
5618   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5619     {
5620       struct version_info *info = ver_info (data, j);
5621
5622       if (info->inv_id && info->has_nonlin_use)
5623         n++;
5624     }
5625
5626   data->regs_used = n;
5627   if (dump_file && (dump_flags & TDF_DETAILS))
5628     fprintf (dump_file, "  regs_used %d\n", n);
5629
5630   if (dump_file && (dump_flags & TDF_DETAILS))
5631     {
5632       fprintf (dump_file, "  cost for size:\n");
5633       fprintf (dump_file, "  ivs\tcost\n");
5634       for (j = 0; j <= 2 * target_avail_regs; j++)
5635         fprintf (dump_file, "  %d\t%d\n", j,
5636                  ivopts_estimate_reg_pressure (data, 0, j));
5637       fprintf (dump_file, "\n");
5638     }
5639 }
5640
5641 /* Returns true if A is a cheaper cost pair than B.  */
5642
5643 static bool
5644 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5645 {
5646   if (!a)
5647     return false;
5648
5649   if (!b)
5650     return true;
5651
5652   if (a->cost < b->cost)
5653     return true;
5654
5655   if (b->cost < a->cost)
5656     return false;
5657
5658   /* In case the costs are the same, prefer the cheaper candidate.  */
5659   if (a->cand->cost < b->cand->cost)
5660     return true;
5661
5662   return false;
5663 }
5664
5665 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5666    for more expensive, equal and cheaper respectively.  */
5667
5668 static int
5669 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5670 {
5671   if (cheaper_cost_pair (a, b))
5672     return -1;
5673   if (cheaper_cost_pair (b, a))
5674     return 1;
5675
5676   return 0;
5677 }
5678
5679 /* Returns candidate by that USE is expressed in IVS.  */
5680
5681 static struct cost_pair *
5682 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5683 {
5684   return ivs->cand_for_group[group->id];
5685 }
5686
5687 /* Computes the cost field of IVS structure.  */
5688
5689 static void
5690 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5691 {
5692   comp_cost cost = ivs->cand_use_cost;
5693
5694   cost += ivs->cand_cost;
5695   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5696   ivs->cost = cost;
5697 }
5698
5699 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5700    and IVS.  */
5701
5702 static void
5703 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5704 {
5705   bitmap_iterator bi;
5706   unsigned iid;
5707
5708   if (!invs)
5709     return;
5710
5711   gcc_assert (n_inv_uses != NULL);
5712   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5713     {
5714       n_inv_uses[iid]--;
5715       if (n_inv_uses[iid] == 0)
5716         ivs->n_invs--;
5717     }
5718 }
5719
5720 /* Set USE not to be expressed by any candidate in IVS.  */
5721
5722 static void
5723 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5724                  struct iv_group *group)
5725 {
5726   unsigned gid = group->id, cid;
5727   struct cost_pair *cp;
5728
5729   cp = ivs->cand_for_group[gid];
5730   if (!cp)
5731     return;
5732   cid = cp->cand->id;
5733
5734   ivs->bad_groups++;
5735   ivs->cand_for_group[gid] = NULL;
5736   ivs->n_cand_uses[cid]--;
5737
5738   if (ivs->n_cand_uses[cid] == 0)
5739     {
5740       bitmap_clear_bit (ivs->cands, cid);
5741       ivs->n_cands--;
5742       ivs->cand_cost -= cp->cand->cost;
5743       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5744       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5745     }
5746
5747   ivs->cand_use_cost -= cp->cost;
5748   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5749   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5750   iv_ca_recount_cost (data, ivs);
5751 }
5752
5753 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5754    IVS.  */
5755
5756 static void
5757 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5758 {
5759   bitmap_iterator bi;
5760   unsigned iid;
5761
5762   if (!invs)
5763     return;
5764
5765   gcc_assert (n_inv_uses != NULL);
5766   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5767     {
5768       n_inv_uses[iid]++;
5769       if (n_inv_uses[iid] == 1)
5770         ivs->n_invs++;
5771     }
5772 }
5773
5774 /* Set cost pair for GROUP in set IVS to CP.  */
5775
5776 static void
5777 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5778               struct iv_group *group, struct cost_pair *cp)
5779 {
5780   unsigned gid = group->id, cid;
5781
5782   if (ivs->cand_for_group[gid] == cp)
5783     return;
5784
5785   if (ivs->cand_for_group[gid])
5786     iv_ca_set_no_cp (data, ivs, group);
5787
5788   if (cp)
5789     {
5790       cid = cp->cand->id;
5791
5792       ivs->bad_groups--;
5793       ivs->cand_for_group[gid] = cp;
5794       ivs->n_cand_uses[cid]++;
5795       if (ivs->n_cand_uses[cid] == 1)
5796         {
5797           bitmap_set_bit (ivs->cands, cid);
5798           ivs->n_cands++;
5799           ivs->cand_cost += cp->cand->cost;
5800           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5801           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5802         }
5803
5804       ivs->cand_use_cost += cp->cost;
5805       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5806       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5807       iv_ca_recount_cost (data, ivs);
5808     }
5809 }
5810
5811 /* Extend set IVS by expressing USE by some of the candidates in it
5812    if possible.  Consider all important candidates if candidates in
5813    set IVS don't give any result.  */
5814
5815 static void
5816 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5817                struct iv_group *group)
5818 {
5819   struct cost_pair *best_cp = NULL, *cp;
5820   bitmap_iterator bi;
5821   unsigned i;
5822   struct iv_cand *cand;
5823
5824   gcc_assert (ivs->upto >= group->id);
5825   ivs->upto++;
5826   ivs->bad_groups++;
5827
5828   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5829     {
5830       cand = data->vcands[i];
5831       cp = get_group_iv_cost (data, group, cand);
5832       if (cheaper_cost_pair (cp, best_cp))
5833         best_cp = cp;
5834     }
5835
5836   if (best_cp == NULL)
5837     {
5838       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5839         {
5840           cand = data->vcands[i];
5841           cp = get_group_iv_cost (data, group, cand);
5842           if (cheaper_cost_pair (cp, best_cp))
5843             best_cp = cp;
5844         }
5845     }
5846
5847   iv_ca_set_cp (data, ivs, group, best_cp);
5848 }
5849
5850 /* Get cost for assignment IVS.  */
5851
5852 static comp_cost
5853 iv_ca_cost (struct iv_ca *ivs)
5854 {
5855   /* This was a conditional expression but it triggered a bug in
5856      Sun C 5.5.  */
5857   if (ivs->bad_groups)
5858     return infinite_cost;
5859   else
5860     return ivs->cost;
5861 }
5862
5863 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5864    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
5865    respectively.  */
5866
5867 static int
5868 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5869                     struct iv_group *group, struct cost_pair *old_cp,
5870                     struct cost_pair *new_cp)
5871 {
5872   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5873   unsigned old_n_invs = ivs->n_invs;
5874   iv_ca_set_cp (data, ivs, group, new_cp);
5875   unsigned new_n_invs = ivs->n_invs;
5876   iv_ca_set_cp (data, ivs, group, old_cp);
5877
5878   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5879 }
5880
5881 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5882    it before NEXT.  */
5883
5884 static struct iv_ca_delta *
5885 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5886                  struct cost_pair *new_cp, struct iv_ca_delta *next)
5887 {
5888   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5889
5890   change->group = group;
5891   change->old_cp = old_cp;
5892   change->new_cp = new_cp;
5893   change->next = next;
5894
5895   return change;
5896 }
5897
5898 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5899    are rewritten.  */
5900
5901 static struct iv_ca_delta *
5902 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5903 {
5904   struct iv_ca_delta *last;
5905
5906   if (!l2)
5907     return l1;
5908
5909   if (!l1)
5910     return l2;
5911
5912   for (last = l1; last->next; last = last->next)
5913     continue;
5914   last->next = l2;
5915
5916   return l1;
5917 }
5918
5919 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5920
5921 static struct iv_ca_delta *
5922 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5923 {
5924   struct iv_ca_delta *act, *next, *prev = NULL;
5925
5926   for (act = delta; act; act = next)
5927     {
5928       next = act->next;
5929       act->next = prev;
5930       prev = act;
5931
5932       std::swap (act->old_cp, act->new_cp);
5933     }
5934
5935   return prev;
5936 }
5937
5938 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5939    reverted instead.  */
5940
5941 static void
5942 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5943                     struct iv_ca_delta *delta, bool forward)
5944 {
5945   struct cost_pair *from, *to;
5946   struct iv_ca_delta *act;
5947
5948   if (!forward)
5949     delta = iv_ca_delta_reverse (delta);
5950
5951   for (act = delta; act; act = act->next)
5952     {
5953       from = act->old_cp;
5954       to = act->new_cp;
5955       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
5956       iv_ca_set_cp (data, ivs, act->group, to);
5957     }
5958
5959   if (!forward)
5960     iv_ca_delta_reverse (delta);
5961 }
5962
5963 /* Returns true if CAND is used in IVS.  */
5964
5965 static bool
5966 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5967 {
5968   return ivs->n_cand_uses[cand->id] > 0;
5969 }
5970
5971 /* Returns number of induction variable candidates in the set IVS.  */
5972
5973 static unsigned
5974 iv_ca_n_cands (struct iv_ca *ivs)
5975 {
5976   return ivs->n_cands;
5977 }
5978
5979 /* Free the list of changes DELTA.  */
5980
5981 static void
5982 iv_ca_delta_free (struct iv_ca_delta **delta)
5983 {
5984   struct iv_ca_delta *act, *next;
5985
5986   for (act = *delta; act; act = next)
5987     {
5988       next = act->next;
5989       free (act);
5990     }
5991
5992   *delta = NULL;
5993 }
5994
5995 /* Allocates new iv candidates assignment.  */
5996
5997 static struct iv_ca *
5998 iv_ca_new (struct ivopts_data *data)
5999 {
6000   struct iv_ca *nw = XNEW (struct iv_ca);
6001
6002   nw->upto = 0;
6003   nw->bad_groups = 0;
6004   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6005                                  data->vgroups.length ());
6006   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6007   nw->cands = BITMAP_ALLOC (NULL);
6008   nw->n_cands = 0;
6009   nw->n_invs = 0;
6010   nw->cand_use_cost = no_cost;
6011   nw->cand_cost = 0;
6012   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6013   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6014   nw->cost = no_cost;
6015
6016   return nw;
6017 }
6018
6019 /* Free memory occupied by the set IVS.  */
6020
6021 static void
6022 iv_ca_free (struct iv_ca **ivs)
6023 {
6024   free ((*ivs)->cand_for_group);
6025   free ((*ivs)->n_cand_uses);
6026   BITMAP_FREE ((*ivs)->cands);
6027   free ((*ivs)->n_inv_var_uses);
6028   free ((*ivs)->n_inv_expr_uses);
6029   free (*ivs);
6030   *ivs = NULL;
6031 }
6032
6033 /* Dumps IVS to FILE.  */
6034
6035 static void
6036 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6037 {
6038   unsigned i;
6039   comp_cost cost = iv_ca_cost (ivs);
6040
6041   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6042            cost.complexity);
6043   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6044            ivs->cand_cost, ivs->cand_use_cost.cost,
6045            ivs->cand_use_cost.complexity);
6046   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6047
6048   for (i = 0; i < ivs->upto; i++)
6049     {
6050       struct iv_group *group = data->vgroups[i];
6051       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6052       if (cp)
6053         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6054                  group->id, cp->cand->id, cp->cost.cost,
6055                  cp->cost.complexity);
6056       else
6057         fprintf (file, "   group:%d --> ??\n", group->id);
6058     }
6059
6060   const char *pref = "";
6061   fprintf (file, "  invariant variables: ");
6062   for (i = 1; i <= data->max_inv_var_id; i++)
6063     if (ivs->n_inv_var_uses[i])
6064       {
6065         fprintf (file, "%s%d", pref, i);
6066         pref = ", ";
6067       }
6068
6069   pref = "";
6070   fprintf (file, "\n  invariant expressions: ");
6071   for (i = 1; i <= data->max_inv_expr_id; i++)
6072     if (ivs->n_inv_expr_uses[i])
6073       {
6074         fprintf (file, "%s%d", pref, i);
6075         pref = ", ";
6076       }
6077
6078   fprintf (file, "\n\n");
6079 }
6080
6081 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6082    new set, and store differences in DELTA.  Number of induction variables
6083    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6084    the function will try to find a solution with mimimal iv candidates.  */
6085
6086 static comp_cost
6087 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6088               struct iv_cand *cand, struct iv_ca_delta **delta,
6089               unsigned *n_ivs, bool min_ncand)
6090 {
6091   unsigned i;
6092   comp_cost cost;
6093   struct iv_group *group;
6094   struct cost_pair *old_cp, *new_cp;
6095
6096   *delta = NULL;
6097   for (i = 0; i < ivs->upto; i++)
6098     {
6099       group = data->vgroups[i];
6100       old_cp = iv_ca_cand_for_group (ivs, group);
6101
6102       if (old_cp
6103           && old_cp->cand == cand)
6104         continue;
6105
6106       new_cp = get_group_iv_cost (data, group, cand);
6107       if (!new_cp)
6108         continue;
6109
6110       if (!min_ncand)
6111         {
6112           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6113           /* Skip if new_cp depends on more invariants.  */
6114           if (cmp_invs > 0)
6115             continue;
6116
6117           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6118           /* Skip if new_cp is not cheaper.  */
6119           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6120             continue;
6121         }
6122
6123       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6124     }
6125
6126   iv_ca_delta_commit (data, ivs, *delta, true);
6127   cost = iv_ca_cost (ivs);
6128   if (n_ivs)
6129     *n_ivs = iv_ca_n_cands (ivs);
6130   iv_ca_delta_commit (data, ivs, *delta, false);
6131
6132   return cost;
6133 }
6134
6135 /* Try narrowing set IVS by removing CAND.  Return the cost of
6136    the new set and store the differences in DELTA.  START is
6137    the candidate with which we start narrowing.  */
6138
6139 static comp_cost
6140 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6141               struct iv_cand *cand, struct iv_cand *start,
6142               struct iv_ca_delta **delta)
6143 {
6144   unsigned i, ci;
6145   struct iv_group *group;
6146   struct cost_pair *old_cp, *new_cp, *cp;
6147   bitmap_iterator bi;
6148   struct iv_cand *cnd;
6149   comp_cost cost, best_cost, acost;
6150
6151   *delta = NULL;
6152   for (i = 0; i < data->vgroups.length (); i++)
6153     {
6154       group = data->vgroups[i];
6155
6156       old_cp = iv_ca_cand_for_group (ivs, group);
6157       if (old_cp->cand != cand)
6158         continue;
6159
6160       best_cost = iv_ca_cost (ivs);
6161       /* Start narrowing with START.  */
6162       new_cp = get_group_iv_cost (data, group, start);
6163
6164       if (data->consider_all_candidates)
6165         {
6166           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6167             {
6168               if (ci == cand->id || (start && ci == start->id))
6169                 continue;
6170
6171               cnd = data->vcands[ci];
6172
6173               cp = get_group_iv_cost (data, group, cnd);
6174               if (!cp)
6175                 continue;
6176
6177               iv_ca_set_cp (data, ivs, group, cp);
6178               acost = iv_ca_cost (ivs);
6179
6180               if (acost < best_cost)
6181                 {
6182                   best_cost = acost;
6183                   new_cp = cp;
6184                 }
6185             }
6186         }
6187       else
6188         {
6189           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6190             {
6191               if (ci == cand->id || (start && ci == start->id))
6192                 continue;
6193
6194               cnd = data->vcands[ci];
6195
6196               cp = get_group_iv_cost (data, group, cnd);
6197               if (!cp)
6198                 continue;
6199
6200               iv_ca_set_cp (data, ivs, group, cp);
6201               acost = iv_ca_cost (ivs);
6202
6203               if (acost < best_cost)
6204                 {
6205                   best_cost = acost;
6206                   new_cp = cp;
6207                 }
6208             }
6209         }
6210       /* Restore to old cp for use.  */
6211       iv_ca_set_cp (data, ivs, group, old_cp);
6212
6213       if (!new_cp)
6214         {
6215           iv_ca_delta_free (delta);
6216           return infinite_cost;
6217         }
6218
6219       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6220     }
6221
6222   iv_ca_delta_commit (data, ivs, *delta, true);
6223   cost = iv_ca_cost (ivs);
6224   iv_ca_delta_commit (data, ivs, *delta, false);
6225
6226   return cost;
6227 }
6228
6229 /* Try optimizing the set of candidates IVS by removing candidates different
6230    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6231    differences in DELTA.  */
6232
6233 static comp_cost
6234 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6235              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6236 {
6237   bitmap_iterator bi;
6238   struct iv_ca_delta *act_delta, *best_delta;
6239   unsigned i;
6240   comp_cost best_cost, acost;
6241   struct iv_cand *cand;
6242
6243   best_delta = NULL;
6244   best_cost = iv_ca_cost (ivs);
6245
6246   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6247     {
6248       cand = data->vcands[i];
6249
6250       if (cand == except_cand)
6251         continue;
6252
6253       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6254
6255       if (acost < best_cost)
6256         {
6257           best_cost = acost;
6258           iv_ca_delta_free (&best_delta);
6259           best_delta = act_delta;
6260         }
6261       else
6262         iv_ca_delta_free (&act_delta);
6263     }
6264
6265   if (!best_delta)
6266     {
6267       *delta = NULL;
6268       return best_cost;
6269     }
6270
6271   /* Recurse to possibly remove other unnecessary ivs.  */
6272   iv_ca_delta_commit (data, ivs, best_delta, true);
6273   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6274   iv_ca_delta_commit (data, ivs, best_delta, false);
6275   *delta = iv_ca_delta_join (best_delta, *delta);
6276   return best_cost;
6277 }
6278
6279 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6280    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6281    the corresponding cost_pair, otherwise just return BEST_CP.  */
6282
6283 static struct cost_pair*
6284 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6285                         unsigned int cand_idx, struct iv_cand *old_cand,
6286                         struct cost_pair *best_cp)
6287 {
6288   struct iv_cand *cand;
6289   struct cost_pair *cp;
6290
6291   gcc_assert (old_cand != NULL && best_cp != NULL);
6292   if (cand_idx == old_cand->id)
6293     return best_cp;
6294
6295   cand = data->vcands[cand_idx];
6296   cp = get_group_iv_cost (data, group, cand);
6297   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6298     return cp;
6299
6300   return best_cp;
6301 }
6302
6303 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6304    which are used by more than one iv uses.  For each of those candidates,
6305    this function tries to represent iv uses under that candidate using
6306    other ones with lower local cost, then tries to prune the new set.
6307    If the new set has lower cost, It returns the new cost after recording
6308    candidate replacement in list DELTA.  */
6309
6310 static comp_cost
6311 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6312                struct iv_ca_delta **delta)
6313 {
6314   bitmap_iterator bi, bj;
6315   unsigned int i, j, k;
6316   struct iv_cand *cand;
6317   comp_cost orig_cost, acost;
6318   struct iv_ca_delta *act_delta, *tmp_delta;
6319   struct cost_pair *old_cp, *best_cp = NULL;
6320
6321   *delta = NULL;
6322   orig_cost = iv_ca_cost (ivs);
6323
6324   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6325     {
6326       if (ivs->n_cand_uses[i] == 1
6327           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6328         continue;
6329
6330       cand = data->vcands[i];
6331
6332       act_delta = NULL;
6333       /*  Represent uses under current candidate using other ones with
6334           lower local cost.  */
6335       for (j = 0; j < ivs->upto; j++)
6336         {
6337           struct iv_group *group = data->vgroups[j];
6338           old_cp = iv_ca_cand_for_group (ivs, group);
6339
6340           if (old_cp->cand != cand)
6341             continue;
6342
6343           best_cp = old_cp;
6344           if (data->consider_all_candidates)
6345             for (k = 0; k < data->vcands.length (); k++)
6346               best_cp = cheaper_cost_with_cand (data, group, k,
6347                                                 old_cp->cand, best_cp);
6348           else
6349             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6350               best_cp = cheaper_cost_with_cand (data, group, k,
6351                                                 old_cp->cand, best_cp);
6352
6353           if (best_cp == old_cp)
6354             continue;
6355
6356           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6357         }
6358       /* No need for further prune.  */
6359       if (!act_delta)
6360         continue;
6361
6362       /* Prune the new candidate set.  */
6363       iv_ca_delta_commit (data, ivs, act_delta, true);
6364       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6365       iv_ca_delta_commit (data, ivs, act_delta, false);
6366       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6367
6368       if (acost < orig_cost)
6369         {
6370           *delta = act_delta;
6371           return acost;
6372         }
6373       else
6374         iv_ca_delta_free (&act_delta);
6375     }
6376
6377   return orig_cost;
6378 }
6379
6380 /* Tries to extend the sets IVS in the best possible way in order to
6381    express the GROUP.  If ORIGINALP is true, prefer candidates from
6382    the original set of IVs, otherwise favor important candidates not
6383    based on any memory object.  */
6384
6385 static bool
6386 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6387                   struct iv_group *group, bool originalp)
6388 {
6389   comp_cost best_cost, act_cost;
6390   unsigned i;
6391   bitmap_iterator bi;
6392   struct iv_cand *cand;
6393   struct iv_ca_delta *best_delta = NULL, *act_delta;
6394   struct cost_pair *cp;
6395
6396   iv_ca_add_group (data, ivs, group);
6397   best_cost = iv_ca_cost (ivs);
6398   cp = iv_ca_cand_for_group (ivs, group);
6399   if (cp)
6400     {
6401       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6402       iv_ca_set_no_cp (data, ivs, group);
6403     }
6404
6405   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6406      first try important candidates not based on any memory object.  Only if
6407      this fails, try the specific ones.  Rationale -- in loops with many
6408      variables the best choice often is to use just one generic biv.  If we
6409      added here many ivs specific to the uses, the optimization algorithm later
6410      would be likely to get stuck in a local minimum, thus causing us to create
6411      too many ivs.  The approach from few ivs to more seems more likely to be
6412      successful -- starting from few ivs, replacing an expensive use by a
6413      specific iv should always be a win.  */
6414   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6415     {
6416       cand = data->vcands[i];
6417
6418       if (originalp && cand->pos !=IP_ORIGINAL)
6419         continue;
6420
6421       if (!originalp && cand->iv->base_object != NULL_TREE)
6422         continue;
6423
6424       if (iv_ca_cand_used_p (ivs, cand))
6425         continue;
6426
6427       cp = get_group_iv_cost (data, group, cand);
6428       if (!cp)
6429         continue;
6430
6431       iv_ca_set_cp (data, ivs, group, cp);
6432       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6433                                true);
6434       iv_ca_set_no_cp (data, ivs, group);
6435       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6436
6437       if (act_cost < best_cost)
6438         {
6439           best_cost = act_cost;
6440
6441           iv_ca_delta_free (&best_delta);
6442           best_delta = act_delta;
6443         }
6444       else
6445         iv_ca_delta_free (&act_delta);
6446     }
6447
6448   if (best_cost.infinite_cost_p ())
6449     {
6450       for (i = 0; i < group->n_map_members; i++)
6451         {
6452           cp = group->cost_map + i;
6453           cand = cp->cand;
6454           if (!cand)
6455             continue;
6456
6457           /* Already tried this.  */
6458           if (cand->important)
6459             {
6460               if (originalp && cand->pos == IP_ORIGINAL)
6461                 continue;
6462               if (!originalp && cand->iv->base_object == NULL_TREE)
6463                 continue;
6464             }
6465
6466           if (iv_ca_cand_used_p (ivs, cand))
6467             continue;
6468
6469           act_delta = NULL;
6470           iv_ca_set_cp (data, ivs, group, cp);
6471           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6472           iv_ca_set_no_cp (data, ivs, group);
6473           act_delta = iv_ca_delta_add (group,
6474                                        iv_ca_cand_for_group (ivs, group),
6475                                        cp, act_delta);
6476
6477           if (act_cost < best_cost)
6478             {
6479               best_cost = act_cost;
6480
6481               if (best_delta)
6482                 iv_ca_delta_free (&best_delta);
6483               best_delta = act_delta;
6484             }
6485           else
6486             iv_ca_delta_free (&act_delta);
6487         }
6488     }
6489
6490   iv_ca_delta_commit (data, ivs, best_delta, true);
6491   iv_ca_delta_free (&best_delta);
6492
6493   return !best_cost.infinite_cost_p ();
6494 }
6495
6496 /* Finds an initial assignment of candidates to uses.  */
6497
6498 static struct iv_ca *
6499 get_initial_solution (struct ivopts_data *data, bool originalp)
6500 {
6501   unsigned i;
6502   struct iv_ca *ivs = iv_ca_new (data);
6503
6504   for (i = 0; i < data->vgroups.length (); i++)
6505     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6506       {
6507         iv_ca_free (&ivs);
6508         return NULL;
6509       }
6510
6511   return ivs;
6512 }
6513
6514 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6515    points to a bool variable, this function tries to break local
6516    optimal fixed-point by replacing candidates in IVS if it's true.  */
6517
6518 static bool
6519 try_improve_iv_set (struct ivopts_data *data,
6520                     struct iv_ca *ivs, bool *try_replace_p)
6521 {
6522   unsigned i, n_ivs;
6523   comp_cost acost, best_cost = iv_ca_cost (ivs);
6524   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6525   struct iv_cand *cand;
6526
6527   /* Try extending the set of induction variables by one.  */
6528   for (i = 0; i < data->vcands.length (); i++)
6529     {
6530       cand = data->vcands[i];
6531
6532       if (iv_ca_cand_used_p (ivs, cand))
6533         continue;
6534
6535       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6536       if (!act_delta)
6537         continue;
6538
6539       /* If we successfully added the candidate and the set is small enough,
6540          try optimizing it by removing other candidates.  */
6541       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6542         {
6543           iv_ca_delta_commit (data, ivs, act_delta, true);
6544           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6545           iv_ca_delta_commit (data, ivs, act_delta, false);
6546           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6547         }
6548
6549       if (acost < best_cost)
6550         {
6551           best_cost = acost;
6552           iv_ca_delta_free (&best_delta);
6553           best_delta = act_delta;
6554         }
6555       else
6556         iv_ca_delta_free (&act_delta);
6557     }
6558
6559   if (!best_delta)
6560     {
6561       /* Try removing the candidates from the set instead.  */
6562       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6563
6564       if (!best_delta && *try_replace_p)
6565         {
6566           *try_replace_p = false;
6567           /* So far candidate selecting algorithm tends to choose fewer IVs
6568              so that it can handle cases in which loops have many variables
6569              but the best choice is often to use only one general biv.  One
6570              weakness is it can't handle opposite cases, in which different
6571              candidates should be chosen with respect to each use.  To solve
6572              the problem, we replace candidates in a manner described by the
6573              comments of iv_ca_replace, thus give general algorithm a chance
6574              to break local optimal fixed-point in these cases.  */
6575           best_cost = iv_ca_replace (data, ivs, &best_delta);
6576         }
6577
6578       if (!best_delta)
6579         return false;
6580     }
6581
6582   iv_ca_delta_commit (data, ivs, best_delta, true);
6583   gcc_assert (best_cost == iv_ca_cost (ivs));
6584   iv_ca_delta_free (&best_delta);
6585   return true;
6586 }
6587
6588 /* Attempts to find the optimal set of induction variables.  We do simple
6589    greedy heuristic -- we try to replace at most one candidate in the selected
6590    solution and remove the unused ivs while this improves the cost.  */
6591
6592 static struct iv_ca *
6593 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6594 {
6595   struct iv_ca *set;
6596   bool try_replace_p = true;
6597
6598   /* Get the initial solution.  */
6599   set = get_initial_solution (data, originalp);
6600   if (!set)
6601     {
6602       if (dump_file && (dump_flags & TDF_DETAILS))
6603         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6604       return NULL;
6605     }
6606
6607   if (dump_file && (dump_flags & TDF_DETAILS))
6608     {
6609       fprintf (dump_file, "Initial set of candidates:\n");
6610       iv_ca_dump (data, dump_file, set);
6611     }
6612
6613   while (try_improve_iv_set (data, set, &try_replace_p))
6614     {
6615       if (dump_file && (dump_flags & TDF_DETAILS))
6616         {
6617           fprintf (dump_file, "Improved to:\n");
6618           iv_ca_dump (data, dump_file, set);
6619         }
6620     }
6621
6622   return set;
6623 }
6624
6625 static struct iv_ca *
6626 find_optimal_iv_set (struct ivopts_data *data)
6627 {
6628   unsigned i;
6629   comp_cost cost, origcost;
6630   struct iv_ca *set, *origset;
6631
6632   /* Determine the cost based on a strategy that starts with original IVs,
6633      and try again using a strategy that prefers candidates not based
6634      on any IVs.  */
6635   origset = find_optimal_iv_set_1 (data, true);
6636   set = find_optimal_iv_set_1 (data, false);
6637
6638   if (!origset && !set)
6639     return NULL;
6640
6641   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6642   cost = set ? iv_ca_cost (set) : infinite_cost;
6643
6644   if (dump_file && (dump_flags & TDF_DETAILS))
6645     {
6646       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6647                origcost.cost, origcost.complexity);
6648       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6649                cost.cost, cost.complexity);
6650     }
6651
6652   /* Choose the one with the best cost.  */
6653   if (origcost <= cost)
6654     {
6655       if (set)
6656         iv_ca_free (&set);
6657       set = origset;
6658     }
6659   else if (origset)
6660     iv_ca_free (&origset);
6661
6662   for (i = 0; i < data->vgroups.length (); i++)
6663     {
6664       struct iv_group *group = data->vgroups[i];
6665       group->selected = iv_ca_cand_for_group (set, group)->cand;
6666     }
6667
6668   return set;
6669 }
6670
6671 /* Creates a new induction variable corresponding to CAND.  */
6672
6673 static void
6674 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6675 {
6676   gimple_stmt_iterator incr_pos;
6677   tree base;
6678   struct iv_use *use;
6679   struct iv_group *group;
6680   bool after = false;
6681
6682   gcc_assert (cand->iv != NULL);
6683
6684   switch (cand->pos)
6685     {
6686     case IP_NORMAL:
6687       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6688       break;
6689
6690     case IP_END:
6691       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6692       after = true;
6693       break;
6694
6695     case IP_AFTER_USE:
6696       after = true;
6697       /* fall through */
6698     case IP_BEFORE_USE:
6699       incr_pos = gsi_for_stmt (cand->incremented_at);
6700       break;
6701
6702     case IP_ORIGINAL:
6703       /* Mark that the iv is preserved.  */
6704       name_info (data, cand->var_before)->preserve_biv = true;
6705       name_info (data, cand->var_after)->preserve_biv = true;
6706
6707       /* Rewrite the increment so that it uses var_before directly.  */
6708       use = find_interesting_uses_op (data, cand->var_after);
6709       group = data->vgroups[use->group_id];
6710       group->selected = cand;
6711       return;
6712     }
6713
6714   gimple_add_tmp_var (cand->var_before);
6715
6716   base = unshare_expr (cand->iv->base);
6717
6718   create_iv (base, unshare_expr (cand->iv->step),
6719              cand->var_before, data->current_loop,
6720              &incr_pos, after, &cand->var_before, &cand->var_after);
6721 }
6722
6723 /* Creates new induction variables described in SET.  */
6724
6725 static void
6726 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6727 {
6728   unsigned i;
6729   struct iv_cand *cand;
6730   bitmap_iterator bi;
6731
6732   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6733     {
6734       cand = data->vcands[i];
6735       create_new_iv (data, cand);
6736     }
6737
6738   if (dump_file && (dump_flags & TDF_DETAILS))
6739     {
6740       fprintf (dump_file, "Selected IV set for loop %d",
6741                data->current_loop->num);
6742       if (data->loop_loc != UNKNOWN_LOCATION)
6743         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6744                  LOCATION_LINE (data->loop_loc));
6745       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6746                avg_loop_niter (data->current_loop));
6747       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6748       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6749         {
6750           cand = data->vcands[i];
6751           dump_cand (dump_file, cand);
6752         }
6753       fprintf (dump_file, "\n");
6754     }
6755 }
6756
6757 /* Rewrites USE (definition of iv used in a nonlinear expression)
6758    using candidate CAND.  */
6759
6760 static void
6761 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6762                             struct iv_use *use, struct iv_cand *cand)
6763 {
6764   gassign *ass;
6765   gimple_stmt_iterator bsi;
6766   tree comp, type = get_use_type (use), tgt;
6767
6768   /* An important special case -- if we are asked to express value of
6769      the original iv by itself, just exit; there is no need to
6770      introduce a new computation (that might also need casting the
6771      variable to unsigned and back).  */
6772   if (cand->pos == IP_ORIGINAL
6773       && cand->incremented_at == use->stmt)
6774     {
6775       tree op = NULL_TREE;
6776       enum tree_code stmt_code;
6777
6778       gcc_assert (is_gimple_assign (use->stmt));
6779       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6780
6781       /* Check whether we may leave the computation unchanged.
6782          This is the case only if it does not rely on other
6783          computations in the loop -- otherwise, the computation
6784          we rely upon may be removed in remove_unused_ivs,
6785          thus leading to ICE.  */
6786       stmt_code = gimple_assign_rhs_code (use->stmt);
6787       if (stmt_code == PLUS_EXPR
6788           || stmt_code == MINUS_EXPR
6789           || stmt_code == POINTER_PLUS_EXPR)
6790         {
6791           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6792             op = gimple_assign_rhs2 (use->stmt);
6793           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6794             op = gimple_assign_rhs1 (use->stmt);
6795         }
6796
6797       if (op != NULL_TREE)
6798         {
6799           if (expr_invariant_in_loop_p (data->current_loop, op))
6800             return;
6801           if (TREE_CODE (op) == SSA_NAME)
6802             {
6803               struct iv *iv = get_iv (data, op);
6804               if (iv != NULL && integer_zerop (iv->step))
6805                 return;
6806             }
6807         }
6808     }
6809
6810   switch (gimple_code (use->stmt))
6811     {
6812     case GIMPLE_PHI:
6813       tgt = PHI_RESULT (use->stmt);
6814
6815       /* If we should keep the biv, do not replace it.  */
6816       if (name_info (data, tgt)->preserve_biv)
6817         return;
6818
6819       bsi = gsi_after_labels (gimple_bb (use->stmt));
6820       break;
6821
6822     case GIMPLE_ASSIGN:
6823       tgt = gimple_assign_lhs (use->stmt);
6824       bsi = gsi_for_stmt (use->stmt);
6825       break;
6826
6827     default:
6828       gcc_unreachable ();
6829     }
6830
6831   aff_tree aff_inv, aff_var;
6832   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6833                               use, cand, &aff_inv, &aff_var))
6834     gcc_unreachable ();
6835
6836   unshare_aff_combination (&aff_inv);
6837   unshare_aff_combination (&aff_var);
6838   /* Prefer CSE opportunity than loop invariant by adding offset at last
6839      so that iv_uses have different offsets can be CSEed.  */
6840   widest_int offset = aff_inv.offset;
6841   aff_inv.offset = 0;
6842
6843   gimple_seq stmt_list = NULL, seq = NULL;
6844   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6845   tree comp_op2 = aff_combination_to_tree (&aff_var);
6846   gcc_assert (comp_op1 && comp_op2);
6847
6848   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6849   gimple_seq_add_seq (&stmt_list, seq);
6850   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6851   gimple_seq_add_seq (&stmt_list, seq);
6852
6853   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6854     std::swap (comp_op1, comp_op2);
6855
6856   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6857     {
6858       comp = fold_build_pointer_plus (comp_op1,
6859                                       fold_convert (sizetype, comp_op2));
6860       comp = fold_build_pointer_plus (comp,
6861                                       wide_int_to_tree (sizetype, offset));
6862     }
6863   else
6864     {
6865       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6866                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
6867       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6868                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6869     }
6870
6871   comp = fold_convert (type, comp);
6872   if (!valid_gimple_rhs_p (comp)
6873       || (gimple_code (use->stmt) != GIMPLE_PHI
6874           /* We can't allow re-allocating the stmt as it might be pointed
6875              to still.  */
6876           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6877               >= gimple_num_ops (gsi_stmt (bsi)))))
6878     {
6879       comp = force_gimple_operand (comp, &seq, true, NULL);
6880       gimple_seq_add_seq (&stmt_list, seq);
6881       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6882         {
6883           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6884           /* As this isn't a plain copy we have to reset alignment
6885              information.  */
6886           if (SSA_NAME_PTR_INFO (comp))
6887             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6888         }
6889     }
6890
6891   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
6892   if (gimple_code (use->stmt) == GIMPLE_PHI)
6893     {
6894       ass = gimple_build_assign (tgt, comp);
6895       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6896
6897       bsi = gsi_for_stmt (use->stmt);
6898       remove_phi_node (&bsi, false);
6899     }
6900   else
6901     {
6902       gimple_assign_set_rhs_from_tree (&bsi, comp);
6903       use->stmt = gsi_stmt (bsi);
6904     }
6905 }
6906
6907 /* Performs a peephole optimization to reorder the iv update statement with
6908    a mem ref to enable instruction combining in later phases. The mem ref uses
6909    the iv value before the update, so the reordering transformation requires
6910    adjustment of the offset. CAND is the selected IV_CAND.
6911
6912    Example:
6913
6914    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6915    iv2 = iv1 + 1;
6916
6917    if (t < val)      (1)
6918      goto L;
6919    goto Head;
6920
6921
6922    directly propagating t over to (1) will introduce overlapping live range
6923    thus increase register pressure. This peephole transform it into:
6924
6925
6926    iv2 = iv1 + 1;
6927    t = MEM_REF (base, iv2, 8, 8);
6928    if (t < val)
6929      goto L;
6930    goto Head;
6931 */
6932
6933 static void
6934 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6935 {
6936   tree var_after;
6937   gimple *iv_update, *stmt;
6938   basic_block bb;
6939   gimple_stmt_iterator gsi, gsi_iv;
6940
6941   if (cand->pos != IP_NORMAL)
6942     return;
6943
6944   var_after = cand->var_after;
6945   iv_update = SSA_NAME_DEF_STMT (var_after);
6946
6947   bb = gimple_bb (iv_update);
6948   gsi = gsi_last_nondebug_bb (bb);
6949   stmt = gsi_stmt (gsi);
6950
6951   /* Only handle conditional statement for now.  */
6952   if (gimple_code (stmt) != GIMPLE_COND)
6953     return;
6954
6955   gsi_prev_nondebug (&gsi);
6956   stmt = gsi_stmt (gsi);
6957   if (stmt != iv_update)
6958     return;
6959
6960   gsi_prev_nondebug (&gsi);
6961   if (gsi_end_p (gsi))
6962     return;
6963
6964   stmt = gsi_stmt (gsi);
6965   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6966     return;
6967
6968   if (stmt != use->stmt)
6969     return;
6970
6971   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6972     return;
6973
6974   if (dump_file && (dump_flags & TDF_DETAILS))
6975     {
6976       fprintf (dump_file, "Reordering \n");
6977       print_gimple_stmt (dump_file, iv_update, 0);
6978       print_gimple_stmt (dump_file, use->stmt, 0);
6979       fprintf (dump_file, "\n");
6980     }
6981
6982   gsi = gsi_for_stmt (use->stmt);
6983   gsi_iv = gsi_for_stmt (iv_update);
6984   gsi_move_before (&gsi_iv, &gsi);
6985
6986   cand->pos = IP_BEFORE_USE;
6987   cand->incremented_at = use->stmt;
6988 }
6989
6990 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6991
6992 static void
6993 rewrite_use_address (struct ivopts_data *data,
6994                      struct iv_use *use, struct iv_cand *cand)
6995 {
6996   aff_tree aff;
6997   bool ok;
6998
6999   adjust_iv_update_pos (cand, use);
7000   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7001   gcc_assert (ok);
7002   unshare_aff_combination (&aff);
7003
7004   /* To avoid undefined overflow problems, all IV candidates use unsigned
7005      integer types.  The drawback is that this makes it impossible for
7006      create_mem_ref to distinguish an IV that is based on a memory object
7007      from one that represents simply an offset.
7008
7009      To work around this problem, we pass a hint to create_mem_ref that
7010      indicates which variable (if any) in aff is an IV based on a memory
7011      object.  Note that we only consider the candidate.  If this is not
7012      based on an object, the base of the reference is in some subexpression
7013      of the use -- but these will use pointer types, so they are recognized
7014      by the create_mem_ref heuristics anyway.  */
7015   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7016   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7017   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7018   tree type = TREE_TYPE (*use->op_p);
7019   unsigned int align = get_object_alignment (*use->op_p);
7020   if (align != TYPE_ALIGN (type))
7021     type = build_aligned_type (type, align);
7022
7023   tree ref = create_mem_ref (&bsi, type, &aff,
7024                              reference_alias_ptr_type (*use->op_p),
7025                              iv, base_hint, data->speed);
7026
7027   copy_ref_info (ref, *use->op_p);
7028   *use->op_p = ref;
7029 }
7030
7031 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7032    candidate CAND.  */
7033
7034 static void
7035 rewrite_use_compare (struct ivopts_data *data,
7036                      struct iv_use *use, struct iv_cand *cand)
7037 {
7038   tree comp, op, bound;
7039   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7040   enum tree_code compare;
7041   struct iv_group *group = data->vgroups[use->group_id];
7042   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7043
7044   bound = cp->value;
7045   if (bound)
7046     {
7047       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7048       tree var_type = TREE_TYPE (var);
7049       gimple_seq stmts;
7050
7051       if (dump_file && (dump_flags & TDF_DETAILS))
7052         {
7053           fprintf (dump_file, "Replacing exit test: ");
7054           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7055         }
7056       compare = cp->comp;
7057       bound = unshare_expr (fold_convert (var_type, bound));
7058       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7059       if (stmts)
7060         gsi_insert_seq_on_edge_immediate (
7061                 loop_preheader_edge (data->current_loop),
7062                 stmts);
7063
7064       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7065       gimple_cond_set_lhs (cond_stmt, var);
7066       gimple_cond_set_code (cond_stmt, compare);
7067       gimple_cond_set_rhs (cond_stmt, op);
7068       return;
7069     }
7070
7071   /* The induction variable elimination failed; just express the original
7072      giv.  */
7073   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7074   gcc_assert (comp != NULL_TREE);
7075   gcc_assert (use->op_p != NULL);
7076   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7077                                          SSA_NAME_VAR (*use->op_p),
7078                                          true, GSI_SAME_STMT);
7079 }
7080
7081 /* Rewrite the groups using the selected induction variables.  */
7082
7083 static void
7084 rewrite_groups (struct ivopts_data *data)
7085 {
7086   unsigned i, j;
7087
7088   for (i = 0; i < data->vgroups.length (); i++)
7089     {
7090       struct iv_group *group = data->vgroups[i];
7091       struct iv_cand *cand = group->selected;
7092
7093       gcc_assert (cand);
7094
7095       if (group->type == USE_NONLINEAR_EXPR)
7096         {
7097           for (j = 0; j < group->vuses.length (); j++)
7098             {
7099               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7100               update_stmt (group->vuses[j]->stmt);
7101             }
7102         }
7103       else if (group->type == USE_ADDRESS)
7104         {
7105           for (j = 0; j < group->vuses.length (); j++)
7106             {
7107               rewrite_use_address (data, group->vuses[j], cand);
7108               update_stmt (group->vuses[j]->stmt);
7109             }
7110         }
7111       else
7112         {
7113           gcc_assert (group->type == USE_COMPARE);
7114
7115           for (j = 0; j < group->vuses.length (); j++)
7116             {
7117               rewrite_use_compare (data, group->vuses[j], cand);
7118               update_stmt (group->vuses[j]->stmt);
7119             }
7120         }
7121     }
7122 }
7123
7124 /* Removes the ivs that are not used after rewriting.  */
7125
7126 static void
7127 remove_unused_ivs (struct ivopts_data *data)
7128 {
7129   unsigned j;
7130   bitmap_iterator bi;
7131   bitmap toremove = BITMAP_ALLOC (NULL);
7132
7133   /* Figure out an order in which to release SSA DEFs so that we don't
7134      release something that we'd have to propagate into a debug stmt
7135      afterwards.  */
7136   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7137     {
7138       struct version_info *info;
7139
7140       info = ver_info (data, j);
7141       if (info->iv
7142           && !integer_zerop (info->iv->step)
7143           && !info->inv_id
7144           && !info->iv->nonlin_use
7145           && !info->preserve_biv)
7146         {
7147           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7148
7149           tree def = info->iv->ssa_name;
7150
7151           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7152             {
7153               imm_use_iterator imm_iter;
7154               use_operand_p use_p;
7155               gimple *stmt;
7156               int count = 0;
7157
7158               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7159                 {
7160                   if (!gimple_debug_bind_p (stmt))
7161                     continue;
7162
7163                   /* We just want to determine whether to do nothing
7164                      (count == 0), to substitute the computed
7165                      expression into a single use of the SSA DEF by
7166                      itself (count == 1), or to use a debug temp
7167                      because the SSA DEF is used multiple times or as
7168                      part of a larger expression (count > 1). */
7169                   count++;
7170                   if (gimple_debug_bind_get_value (stmt) != def)
7171                     count++;
7172
7173                   if (count > 1)
7174                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7175                 }
7176
7177               if (!count)
7178                 continue;
7179
7180               struct iv_use dummy_use;
7181               struct iv_cand *best_cand = NULL, *cand;
7182               unsigned i, best_pref = 0, cand_pref;
7183
7184               memset (&dummy_use, 0, sizeof (dummy_use));
7185               dummy_use.iv = info->iv;
7186               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7187                 {
7188                   cand = data->vgroups[i]->selected;
7189                   if (cand == best_cand)
7190                     continue;
7191                   cand_pref = operand_equal_p (cand->iv->step,
7192                                                info->iv->step, 0)
7193                     ? 4 : 0;
7194                   cand_pref
7195                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7196                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7197                     ? 2 : 0;
7198                   cand_pref
7199                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7200                     ? 1 : 0;
7201                   if (best_cand == NULL || best_pref < cand_pref)
7202                     {
7203                       best_cand = cand;
7204                       best_pref = cand_pref;
7205                     }
7206                 }
7207
7208               if (!best_cand)
7209                 continue;
7210
7211               tree comp = get_computation_at (data->current_loop,
7212                                               SSA_NAME_DEF_STMT (def),
7213                                               &dummy_use, best_cand);
7214               if (!comp)
7215                 continue;
7216
7217               if (count > 1)
7218                 {
7219                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7220                   DECL_ARTIFICIAL (vexpr) = 1;
7221                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7222                   if (SSA_NAME_VAR (def))
7223                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7224                   else
7225                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7226                   gdebug *def_temp
7227                     = gimple_build_debug_bind (vexpr, comp, NULL);
7228                   gimple_stmt_iterator gsi;
7229
7230                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7231                     gsi = gsi_after_labels (gimple_bb
7232                                             (SSA_NAME_DEF_STMT (def)));
7233                   else
7234                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7235
7236                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7237                   comp = vexpr;
7238                 }
7239
7240               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7241                 {
7242                   if (!gimple_debug_bind_p (stmt))
7243                     continue;
7244
7245                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7246                     SET_USE (use_p, comp);
7247
7248                   update_stmt (stmt);
7249                 }
7250             }
7251         }
7252     }
7253
7254   release_defs_bitset (toremove);
7255
7256   BITMAP_FREE (toremove);
7257 }
7258
7259 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7260    for hash_map::traverse.  */
7261
7262 bool
7263 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7264 {
7265   free (value);
7266   return true;
7267 }
7268
7269 /* Frees data allocated by the optimization of a single loop.  */
7270
7271 static void
7272 free_loop_data (struct ivopts_data *data)
7273 {
7274   unsigned i, j;
7275   bitmap_iterator bi;
7276   tree obj;
7277
7278   if (data->niters)
7279     {
7280       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7281       delete data->niters;
7282       data->niters = NULL;
7283     }
7284
7285   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7286     {
7287       struct version_info *info;
7288
7289       info = ver_info (data, i);
7290       info->iv = NULL;
7291       info->has_nonlin_use = false;
7292       info->preserve_biv = false;
7293       info->inv_id = 0;
7294     }
7295   bitmap_clear (data->relevant);
7296   bitmap_clear (data->important_candidates);
7297
7298   for (i = 0; i < data->vgroups.length (); i++)
7299     {
7300       struct iv_group *group = data->vgroups[i];
7301
7302       for (j = 0; j < group->vuses.length (); j++)
7303         free (group->vuses[j]);
7304       group->vuses.release ();
7305
7306       BITMAP_FREE (group->related_cands);
7307       for (j = 0; j < group->n_map_members; j++)
7308         {
7309           if (group->cost_map[j].inv_vars)
7310             BITMAP_FREE (group->cost_map[j].inv_vars);
7311           if (group->cost_map[j].inv_exprs)
7312             BITMAP_FREE (group->cost_map[j].inv_exprs);
7313         }
7314
7315       free (group->cost_map);
7316       free (group);
7317     }
7318   data->vgroups.truncate (0);
7319
7320   for (i = 0; i < data->vcands.length (); i++)
7321     {
7322       struct iv_cand *cand = data->vcands[i];
7323
7324       if (cand->inv_vars)
7325         BITMAP_FREE (cand->inv_vars);
7326       if (cand->inv_exprs)
7327         BITMAP_FREE (cand->inv_exprs);
7328       free (cand);
7329     }
7330   data->vcands.truncate (0);
7331
7332   if (data->version_info_size < num_ssa_names)
7333     {
7334       data->version_info_size = 2 * num_ssa_names;
7335       free (data->version_info);
7336       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7337     }
7338
7339   data->max_inv_var_id = 0;
7340   data->max_inv_expr_id = 0;
7341
7342   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7343     SET_DECL_RTL (obj, NULL_RTX);
7344
7345   decl_rtl_to_reset.truncate (0);
7346
7347   data->inv_expr_tab->empty ();
7348
7349   data->iv_common_cand_tab->empty ();
7350   data->iv_common_cands.truncate (0);
7351 }
7352
7353 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7354    loop tree.  */
7355
7356 static void
7357 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7358 {
7359   free_loop_data (data);
7360   free (data->version_info);
7361   BITMAP_FREE (data->relevant);
7362   BITMAP_FREE (data->important_candidates);
7363
7364   decl_rtl_to_reset.release ();
7365   data->vgroups.release ();
7366   data->vcands.release ();
7367   delete data->inv_expr_tab;
7368   data->inv_expr_tab = NULL;
7369   free_affine_expand_cache (&data->name_expansion_cache);
7370   delete data->iv_common_cand_tab;
7371   data->iv_common_cand_tab = NULL;
7372   data->iv_common_cands.release ();
7373   obstack_free (&data->iv_obstack, NULL);
7374 }
7375
7376 /* Returns true if the loop body BODY includes any function calls.  */
7377
7378 static bool
7379 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7380 {
7381   gimple_stmt_iterator gsi;
7382   unsigned i;
7383
7384   for (i = 0; i < num_nodes; i++)
7385     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7386       {
7387         gimple *stmt = gsi_stmt (gsi);
7388         if (is_gimple_call (stmt)
7389             && !gimple_call_internal_p (stmt)
7390             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7391           return true;
7392       }
7393   return false;
7394 }
7395
7396 /* Optimizes the LOOP.  Returns true if anything changed.  */
7397
7398 static bool
7399 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7400 {
7401   bool changed = false;
7402   struct iv_ca *iv_ca;
7403   edge exit = single_dom_exit (loop);
7404   basic_block *body;
7405
7406   gcc_assert (!data->niters);
7407   data->current_loop = loop;
7408   data->loop_loc = find_loop_location (loop);
7409   data->speed = optimize_loop_for_speed_p (loop);
7410
7411   if (dump_file && (dump_flags & TDF_DETAILS))
7412     {
7413       fprintf (dump_file, "Processing loop %d", loop->num);
7414       if (data->loop_loc != UNKNOWN_LOCATION)
7415         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7416                  LOCATION_LINE (data->loop_loc));
7417       fprintf (dump_file, "\n");
7418
7419       if (exit)
7420         {
7421           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7422                    exit->src->index, exit->dest->index);
7423           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7424           fprintf (dump_file, "\n");
7425         }
7426
7427       fprintf (dump_file, "\n");
7428     }
7429
7430   body = get_loop_body (loop);
7431   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7432   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7433   free (body);
7434
7435   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7436
7437   /* For each ssa name determines whether it behaves as an induction variable
7438      in some loop.  */
7439   if (!find_induction_variables (data))
7440     goto finish;
7441
7442   /* Finds interesting uses (item 1).  */
7443   find_interesting_uses (data);
7444   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7445     goto finish;
7446
7447   /* Finds candidates for the induction variables (item 2).  */
7448   find_iv_candidates (data);
7449
7450   /* Calculates the costs (item 3, part 1).  */
7451   determine_iv_costs (data);
7452   determine_group_iv_costs (data);
7453   determine_set_costs (data);
7454
7455   /* Find the optimal set of induction variables (item 3, part 2).  */
7456   iv_ca = find_optimal_iv_set (data);
7457   if (!iv_ca)
7458     goto finish;
7459   changed = true;
7460
7461   /* Create the new induction variables (item 4, part 1).  */
7462   create_new_ivs (data, iv_ca);
7463   iv_ca_free (&iv_ca);
7464
7465   /* Rewrite the uses (item 4, part 2).  */
7466   rewrite_groups (data);
7467
7468   /* Remove the ivs that are unused after rewriting.  */
7469   remove_unused_ivs (data);
7470
7471   /* We have changed the structure of induction variables; it might happen
7472      that definitions in the scev database refer to some of them that were
7473      eliminated.  */
7474   scev_reset ();
7475
7476 finish:
7477   free_loop_data (data);
7478
7479   return changed;
7480 }
7481
7482 /* Main entry point.  Optimizes induction variables in loops.  */
7483
7484 void
7485 tree_ssa_iv_optimize (void)
7486 {
7487   struct loop *loop;
7488   struct ivopts_data data;
7489
7490   tree_ssa_iv_optimize_init (&data);
7491
7492   /* Optimize the loops starting with the innermost ones.  */
7493   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7494     {
7495       if (dump_file && (dump_flags & TDF_DETAILS))
7496         flow_loop_dump (loop, dump_file, NULL, 1);
7497
7498       tree_ssa_iv_optimize_loop (&data, loop);
7499     }
7500
7501   tree_ssa_iv_optimize_finalize (&data);
7502 }
7503
7504 #include "gt-tree-ssa-loop-ivopts.h"