gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "memmodel.h"
  79 #include "tm_p.h"
  80 #include "ssa.h"
  81 #include "expmed.h"
  82 #include "insn-config.h"
  83 #include "emit-rtl.h"
  84 #include "recog.h"
  85 #include "cgraph.h"
  86 #include "gimple-pretty-print.h"
  87 #include "alias.h"
  88 #include "fold-const.h"
  89 #include "stor-layout.h"
  90 #include "tree-eh.h"
  91 #include "gimplify.h"
  92 #include "gimple-iterator.h"
  93 #include "gimplify-me.h"
  94 #include "tree-cfg.h"
  95 #include "tree-ssa-loop-ivopts.h"
  96 #include "tree-ssa-loop-manip.h"
  97 #include "tree-ssa-loop-niter.h"
  98 #include "tree-ssa-loop.h"
  99 #include "explow.h"
 100 #include "expr.h"
 101 #include "tree-dfa.h"
 102 #include "tree-ssa.h"
 103 #include "cfgloop.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "tree-affine.h"
 107 #include "tree-ssa-propagate.h"
 108 #include "tree-ssa-address.h"
 109 #include "builtins.h"
 110 #include "tree-vectorizer.h"
 111
 112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 113    cost of different addressing modes.  This should be moved to a TBD
 114    interface between the GIMPLE and RTL worlds.  */
 115
 116 /* The infinite cost.  */
 117 #define INFTY 10000000
 118
 119 /* Returns the expected number of loop iterations for LOOP.
 120    The average trip count is computed from profile data if it
 121    exists. */
 122
 123 static inline HOST_WIDE_INT
 124 avg_loop_niter (struct loop *loop)
 125 {
 126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 127   if (niter == -1)
 128     {
 129       niter = likely_max_stmt_executions_int (loop);
 130
 131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
 132         return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_ADDRESS,          /* Use in an address.  */
 170   USE_COMPARE           /* Use is a compare.  */
 171 };
 172
 173 /* Cost of a computation.  */
 174 struct comp_cost
 175 {
 176   comp_cost (): cost (0), complexity (0), scratch (0)
 177   {}
 178
 179   comp_cost (int cost, unsigned complexity, int scratch = 0)
 180     : cost (cost), complexity (complexity), scratch (scratch)
 181   {}
 182
 183   /* Returns true if COST is infinite.  */
 184   bool infinite_cost_p ();
 185
 186   /* Adds costs COST1 and COST2.  */
 187   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 188
 189   /* Adds COST to the comp_cost.  */
 190   comp_cost operator+= (comp_cost cost);
 191
 192   /* Adds constant C to this comp_cost.  */
 193   comp_cost operator+= (HOST_WIDE_INT c);
 194
 195   /* Subtracts constant C to this comp_cost.  */
 196   comp_cost operator-= (HOST_WIDE_INT c);
 197
 198   /* Divide the comp_cost by constant C.  */
 199   comp_cost operator/= (HOST_WIDE_INT c);
 200
 201   /* Multiply the comp_cost by constant C.  */
 202   comp_cost operator*= (HOST_WIDE_INT c);
 203
 204   /* Subtracts costs COST1 and COST2.  */
 205   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 206
 207   /* Subtracts COST from this comp_cost.  */
 208   comp_cost operator-= (comp_cost cost);
 209
 210   /* Returns true if COST1 is smaller than COST2.  */
 211   friend bool operator< (comp_cost cost1, comp_cost cost2);
 212
 213   /* Returns true if COST1 and COST2 are equal.  */
 214   friend bool operator== (comp_cost cost1, comp_cost cost2);
 215
 216   /* Returns true if COST1 is smaller or equal than COST2.  */
 217   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 218
 219   int cost;             /* The runtime cost.  */
 220   unsigned complexity;  /* The estimate of the complexity of the code for
 221                            the computation (in no concrete units --
 222                            complexity field should be larger for more
 223                            complex expressions and addressing modes).  */
 224   int scratch;          /* Scratch used during cost computation.  */
 225 };
 226
 227 static const comp_cost no_cost;
 228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
 229
 230 bool
 231 comp_cost::infinite_cost_p ()
 232 {
 233   return cost == INFTY;
 234 }
 235
 236 comp_cost
 237 operator+ (comp_cost cost1, comp_cost cost2)
 238 {
 239   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 240     return infinite_cost;
 241
 242   cost1.cost += cost2.cost;
 243   cost1.complexity += cost2.complexity;
 244
 245   return cost1;
 246 }
 247
 248 comp_cost
 249 operator- (comp_cost cost1, comp_cost cost2)
 250 {
 251   if (cost1.infinite_cost_p ())
 252     return infinite_cost;
 253
 254   gcc_assert (!cost2.infinite_cost_p ());
 255
 256   cost1.cost -= cost2.cost;
 257   cost1.complexity -= cost2.complexity;
 258
 259   return cost1;
 260 }
 261
 262 comp_cost
 263 comp_cost::operator+= (comp_cost cost)
 264 {
 265   *this = *this + cost;
 266   return *this;
 267 }
 268
 269 comp_cost
 270 comp_cost::operator+= (HOST_WIDE_INT c)
 271 {
 272   if (infinite_cost_p ())
 273     return *this;
 274
 275   this->cost += c;
 276
 277   return *this;
 278 }
 279
 280 comp_cost
 281 comp_cost::operator-= (HOST_WIDE_INT c)
 282 {
 283   if (infinite_cost_p ())
 284     return *this;
 285
 286   this->cost -= c;
 287
 288   return *this;
 289 }
 290
 291 comp_cost
 292 comp_cost::operator/= (HOST_WIDE_INT c)
 293 {
 294   if (infinite_cost_p ())
 295     return *this;
 296
 297   this->cost /= c;
 298
 299   return *this;
 300 }
 301
 302 comp_cost
 303 comp_cost::operator*= (HOST_WIDE_INT c)
 304 {
 305   if (infinite_cost_p ())
 306     return *this;
 307
 308   this->cost *= c;
 309
 310   return *this;
 311 }
 312
 313 comp_cost
 314 comp_cost::operator-= (comp_cost cost)
 315 {
 316   *this = *this - cost;
 317   return *this;
 318 }
 319
 320 bool
 321 operator< (comp_cost cost1, comp_cost cost2)
 322 {
 323   if (cost1.cost == cost2.cost)
 324     return cost1.complexity < cost2.complexity;
 325
 326   return cost1.cost < cost2.cost;
 327 }
 328
 329 bool
 330 operator== (comp_cost cost1, comp_cost cost2)
 331 {
 332   return cost1.cost == cost2.cost
 333     && cost1.complexity == cost2.complexity;
 334 }
 335
 336 bool
 337 operator<= (comp_cost cost1, comp_cost cost2)
 338 {
 339   return cost1 < cost2 || cost1 == cost2;
 340 }
 341
 342 struct iv_inv_expr_ent;
 343
 344 /* The candidate - cost pair.  */
 345 struct cost_pair
 346 {
 347   struct iv_cand *cand; /* The candidate.  */
 348   comp_cost cost;       /* The cost.  */
 349   enum tree_code comp;  /* For iv elimination, the comparison.  */
 350   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 351                            preserved when representing iv_use with iv_cand.  */
 352   bitmap inv_exprs;     /* The list of newly created invariant expressions
 353                            when representing iv_use with iv_cand.  */
 354   tree value;           /* For final value elimination, the expression for
 355                            the final value of the iv.  For iv elimination,
 356                            the new bound to compare with.  */
 357 };
 358
 359 /* Use.  */
 360 struct iv_use
 361 {
 362   unsigned id;          /* The id of the use.  */
 363   unsigned group_id;    /* The group id the use belongs to.  */
 364   enum use_type type;   /* Type of the use.  */
 365   struct iv *iv;        /* The induction variable it is based on.  */
 366   gimple *stmt;         /* Statement in that it occurs.  */
 367   tree *op_p;           /* The place where it occurs.  */
 368
 369   tree addr_base;       /* Base address with const offset stripped.  */
 370   unsigned HOST_WIDE_INT addr_offset;
 371                         /* Const offset stripped from base address.  */
 372 };
 373
 374 /* Group of uses.  */
 375 struct iv_group
 376 {
 377   /* The id of the group.  */
 378   unsigned id;
 379   /* Uses of the group are of the same type.  */
 380   enum use_type type;
 381   /* The set of "related" IV candidates, plus the important ones.  */
 382   bitmap related_cands;
 383   /* Number of IV candidates in the cost_map.  */
 384   unsigned n_map_members;
 385   /* The costs wrto the iv candidates.  */
 386   struct cost_pair *cost_map;
 387   /* The selected candidate for the group.  */
 388   struct iv_cand *selected;
 389   /* Uses in the group.  */
 390   vec<struct iv_use *> vuses;
 391 };
 392
 393 /* The position where the iv is computed.  */
 394 enum iv_position
 395 {
 396   IP_NORMAL,            /* At the end, just before the exit condition.  */
 397   IP_END,               /* At the end of the latch block.  */
 398   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 399   IP_AFTER_USE,         /* Immediately after a specific use.  */
 400   IP_ORIGINAL           /* The original biv.  */
 401 };
 402
 403 /* The induction variable candidate.  */
 404 struct iv_cand
 405 {
 406   unsigned id;          /* The number of the candidate.  */
 407   bool important;       /* Whether this is an "important" candidate, i.e. such
 408                            that it should be considered by all uses.  */
 409   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 410   gimple *incremented_at;/* For original biv, the statement where it is
 411                            incremented.  */
 412   tree var_before;      /* The variable used for it before increment.  */
 413   tree var_after;       /* The variable used for it after increment.  */
 414   struct iv *iv;        /* The value of the candidate.  NULL for
 415                            "pseudocandidate" used to indicate the possibility
 416                            to replace the final value of an iv by direct
 417                            computation of the value.  */
 418   unsigned cost;        /* Cost of the candidate.  */
 419   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 420   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 421                               where it is incremented.  */
 422   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 423                            iv_cand.  */
 424   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 425                            hanlde it as a new invariant expression which will
 426                            be hoisted out of loop.  */
 427   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 428                            smaller type.  */
 429 };
 430
 431 /* Hashtable entry for common candidate derived from iv uses.  */
 432 struct iv_common_cand
 433 {
 434   tree base;
 435   tree step;
 436   /* IV uses from which this common candidate is derived.  */
 437   auto_vec<struct iv_use *> uses;
 438   hashval_t hash;
 439 };
 440
 441 /* Hashtable helpers.  */
 442
 443 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 444 {
 445   static inline hashval_t hash (const iv_common_cand *);
 446   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 447 };
 448
 449 /* Hash function for possible common candidates.  */
 450
 451 inline hashval_t
 452 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 453 {
 454   return ccand->hash;
 455 }
 456
 457 /* Hash table equality function for common candidates.  */
 458
 459 inline bool
 460 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 461                               const iv_common_cand *ccand2)
 462 {
 463   return (ccand1->hash == ccand2->hash
 464           && operand_equal_p (ccand1->base, ccand2->base, 0)
 465           && operand_equal_p (ccand1->step, ccand2->step, 0)
 466           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 467               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 468 }
 469
 470 /* Loop invariant expression hashtable entry.  */
 471
 472 struct iv_inv_expr_ent
 473 {
 474   /* Tree expression of the entry.  */
 475   tree expr;
 476   /* Unique indentifier.  */
 477   int id;
 478   /* Hash value.  */
 479   hashval_t hash;
 480 };
 481
 482 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 483
 484 static int
 485 sort_iv_inv_expr_ent (const void *a, const void *b)
 486 {
 487   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 488   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 489
 490   unsigned id1 = (*e1)->id;
 491   unsigned id2 = (*e2)->id;
 492
 493   if (id1 < id2)
 494     return -1;
 495   else if (id1 > id2)
 496     return 1;
 497   else
 498     return 0;
 499 }
 500
 501 /* Hashtable helpers.  */
 502
 503 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 504 {
 505   static inline hashval_t hash (const iv_inv_expr_ent *);
 506   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 507 };
 508
 509 /* Hash function for loop invariant expressions.  */
 510
 511 inline hashval_t
 512 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 513 {
 514   return expr->hash;
 515 }
 516
 517 /* Hash table equality function for expressions.  */
 518
 519 inline bool
 520 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 521                            const iv_inv_expr_ent *expr2)
 522 {
 523   return expr1->hash == expr2->hash
 524          && operand_equal_p (expr1->expr, expr2->expr, 0);
 525 }
 526
 527 struct ivopts_data
 528 {
 529   /* The currently optimized loop.  */
 530   struct loop *current_loop;
 531   source_location loop_loc;
 532
 533   /* Numbers of iterations for all exits of the current loop.  */
 534   hash_map<edge, tree_niter_desc *> *niters;
 535
 536   /* Number of registers used in it.  */
 537   unsigned regs_used;
 538
 539   /* The size of version_info array allocated.  */
 540   unsigned version_info_size;
 541
 542   /* The array of information for the ssa names.  */
 543   struct version_info *version_info;
 544
 545   /* The hashtable of loop invariant expressions created
 546      by ivopt.  */
 547   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 548
 549   /* The bitmap of indices in version_info whose value was changed.  */
 550   bitmap relevant;
 551
 552   /* The uses of induction variables.  */
 553   vec<iv_group *> vgroups;
 554
 555   /* The candidates.  */
 556   vec<iv_cand *> vcands;
 557
 558   /* A bitmap of important candidates.  */
 559   bitmap important_candidates;
 560
 561   /* Cache used by tree_to_aff_combination_expand.  */
 562   hash_map<tree, name_expansion *> *name_expansion_cache;
 563
 564   /* The hashtable of common candidates derived from iv uses.  */
 565   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 566
 567   /* The common candidates.  */
 568   vec<iv_common_cand *> iv_common_cands;
 569
 570   /* The maximum invariant variable id.  */
 571   unsigned max_inv_var_id;
 572
 573   /* The maximum invariant expression id.  */
 574   unsigned max_inv_expr_id;
 575
 576   /* Number of no_overflow BIVs which are not used in memory address.  */
 577   unsigned bivs_not_used_in_addr;
 578
 579   /* Obstack for iv structure.  */
 580   struct obstack iv_obstack;
 581
 582   /* Whether to consider just related and important candidates when replacing a
 583      use.  */
 584   bool consider_all_candidates;
 585
 586   /* Are we optimizing for speed?  */
 587   bool speed;
 588
 589   /* Whether the loop body includes any function calls.  */
 590   bool body_includes_call;
 591
 592   /* Whether the loop body can only be exited via single exit.  */
 593   bool loop_single_exit_p;
 594 };
 595
 596 /* An assignment of iv candidates to uses.  */
 597
 598 struct iv_ca
 599 {
 600   /* The number of uses covered by the assignment.  */
 601   unsigned upto;
 602
 603   /* Number of uses that cannot be expressed by the candidates in the set.  */
 604   unsigned bad_groups;
 605
 606   /* Candidate assigned to a use, together with the related costs.  */
 607   struct cost_pair **cand_for_group;
 608
 609   /* Number of times each candidate is used.  */
 610   unsigned *n_cand_uses;
 611
 612   /* The candidates used.  */
 613   bitmap cands;
 614
 615   /* The number of candidates in the set.  */
 616   unsigned n_cands;
 617
 618   /* The number of invariants needed, including both invariant variants and
 619      invariant expressions.  */
 620   unsigned n_invs;
 621
 622   /* Total cost of expressing uses.  */
 623   comp_cost cand_use_cost;
 624
 625   /* Total cost of candidates.  */
 626   unsigned cand_cost;
 627
 628   /* Number of times each invariant variable is used.  */
 629   unsigned *n_inv_var_uses;
 630
 631   /* Number of times each invariant expression is used.  */
 632   unsigned *n_inv_expr_uses;
 633
 634   /* Total cost of the assignment.  */
 635   comp_cost cost;
 636 };
 637
 638 /* Difference of two iv candidate assignments.  */
 639
 640 struct iv_ca_delta
 641 {
 642   /* Changed group.  */
 643   struct iv_group *group;
 644
 645   /* An old assignment (for rollback purposes).  */
 646   struct cost_pair *old_cp;
 647
 648   /* A new assignment.  */
 649   struct cost_pair *new_cp;
 650
 651   /* Next change in the list.  */
 652   struct iv_ca_delta *next;
 653 };
 654
 655 /* Bound on number of candidates below that all candidates are considered.  */
 656
 657 #define CONSIDER_ALL_CANDIDATES_BOUND \
 658   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 659
 660 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 661    optimizing such a loop would help, and it would take ages).  */
 662
 663 #define MAX_CONSIDERED_GROUPS \
 664   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 665
 666 /* If there are at most this number of ivs in the set, try removing unnecessary
 667    ivs from the set always.  */
 668
 669 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 670   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 671
 672 /* The list of trees for that the decl_rtl field must be reset is stored
 673    here.  */
 674
 675 static vec<tree> decl_rtl_to_reset;
 676
 677 static comp_cost force_expr_to_var_cost (tree, bool);
 678
 679 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 680
 681 edge
 682 single_dom_exit (struct loop *loop)
 683 {
 684   edge exit = single_exit (loop);
 685
 686   if (!exit)
 687     return NULL;
 688
 689   if (!just_once_each_iteration_p (loop, exit->src))
 690     return NULL;
 691
 692   return exit;
 693 }
 694
 695 /* Dumps information about the induction variable IV to FILE.  Don't dump
 696    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 697    preceding spaces indicated by INDENT_LEVEL.  */
 698
 699 void
 700 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 701 {
 702   const char *p;
 703   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 704
 705   if (indent_level > 4)
 706     indent_level = 4;
 707   p = spaces + 8 - (indent_level << 1);
 708
 709   fprintf (file, "%sIV struct:\n", p);
 710   if (iv->ssa_name && dump_name)
 711     {
 712       fprintf (file, "%s  SSA_NAME:\t", p);
 713       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 714       fprintf (file, "\n");
 715     }
 716
 717   fprintf (file, "%s  Type:\t", p);
 718   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 719   fprintf (file, "\n");
 720
 721   fprintf (file, "%s  Base:\t", p);
 722   print_generic_expr (file, iv->base, TDF_SLIM);
 723   fprintf (file, "\n");
 724
 725   fprintf (file, "%s  Step:\t", p);
 726   print_generic_expr (file, iv->step, TDF_SLIM);
 727   fprintf (file, "\n");
 728
 729   if (iv->base_object)
 730     {
 731       fprintf (file, "%s  Object:\t", p);
 732       print_generic_expr (file, iv->base_object, TDF_SLIM);
 733       fprintf (file, "\n");
 734     }
 735
 736   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 737
 738   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 739            p, iv->no_overflow ? "No-overflow" : "Overflow");
 740 }
 741
 742 /* Dumps information about the USE to FILE.  */
 743
 744 void
 745 dump_use (FILE *file, struct iv_use *use)
 746 {
 747   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 748   fprintf (file, "    At stmt:\t");
 749   print_gimple_stmt (file, use->stmt, 0);
 750   fprintf (file, "    At pos:\t");
 751   if (use->op_p)
 752     print_generic_expr (file, *use->op_p, TDF_SLIM);
 753   fprintf (file, "\n");
 754   dump_iv (file, use->iv, false, 2);
 755 }
 756
 757 /* Dumps information about the uses to FILE.  */
 758
 759 void
 760 dump_groups (FILE *file, struct ivopts_data *data)
 761 {
 762   unsigned i, j;
 763   struct iv_group *group;
 764
 765   for (i = 0; i < data->vgroups.length (); i++)
 766     {
 767       group = data->vgroups[i];
 768       fprintf (file, "Group %d:\n", group->id);
 769       if (group->type == USE_NONLINEAR_EXPR)
 770         fprintf (file, "  Type:\tGENERIC\n");
 771       else if (group->type == USE_ADDRESS)
 772         fprintf (file, "  Type:\tADDRESS\n");
 773       else
 774         {
 775           gcc_assert (group->type == USE_COMPARE);
 776           fprintf (file, "  Type:\tCOMPARE\n");
 777         }
 778       for (j = 0; j < group->vuses.length (); j++)
 779         dump_use (file, group->vuses[j]);
 780     }
 781 }
 782
 783 /* Dumps information about induction variable candidate CAND to FILE.  */
 784
 785 void
 786 dump_cand (FILE *file, struct iv_cand *cand)
 787 {
 788   struct iv *iv = cand->iv;
 789
 790   fprintf (file, "Candidate %d:\n", cand->id);
 791   if (cand->inv_vars)
 792     {
 793       fprintf (file, "  Depend on inv.vars: ");
 794       dump_bitmap (file, cand->inv_vars);
 795     }
 796   if (cand->inv_exprs)
 797     {
 798       fprintf (file, "  Depend on inv.exprs: ");
 799       dump_bitmap (file, cand->inv_exprs);
 800     }
 801
 802   if (cand->var_before)
 803     {
 804       fprintf (file, "  Var befor: ");
 805       print_generic_expr (file, cand->var_before, TDF_SLIM);
 806       fprintf (file, "\n");
 807     }
 808   if (cand->var_after)
 809     {
 810       fprintf (file, "  Var after: ");
 811       print_generic_expr (file, cand->var_after, TDF_SLIM);
 812       fprintf (file, "\n");
 813     }
 814
 815   switch (cand->pos)
 816     {
 817     case IP_NORMAL:
 818       fprintf (file, "  Incr POS: before exit test\n");
 819       break;
 820
 821     case IP_BEFORE_USE:
 822       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 823       break;
 824
 825     case IP_AFTER_USE:
 826       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 827       break;
 828
 829     case IP_END:
 830       fprintf (file, "  Incr POS: at end\n");
 831       break;
 832
 833     case IP_ORIGINAL:
 834       fprintf (file, "  Incr POS: orig biv\n");
 835       break;
 836     }
 837
 838   dump_iv (file, iv, false, 1);
 839 }
 840
 841 /* Returns the info for ssa version VER.  */
 842
 843 static inline struct version_info *
 844 ver_info (struct ivopts_data *data, unsigned ver)
 845 {
 846   return data->version_info + ver;
 847 }
 848
 849 /* Returns the info for ssa name NAME.  */
 850
 851 static inline struct version_info *
 852 name_info (struct ivopts_data *data, tree name)
 853 {
 854   return ver_info (data, SSA_NAME_VERSION (name));
 855 }
 856
 857 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 858    emitted in LOOP.  */
 859
 860 static bool
 861 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 862 {
 863   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 864
 865   gcc_assert (bb);
 866
 867   if (sbb == loop->latch)
 868     return true;
 869
 870   if (sbb != bb)
 871     return false;
 872
 873   return stmt == last_stmt (bb);
 874 }
 875
 876 /* Returns true if STMT if after the place where the original induction
 877    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 878    if the positions are identical.  */
 879
 880 static bool
 881 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 882 {
 883   basic_block cand_bb = gimple_bb (cand->incremented_at);
 884   basic_block stmt_bb = gimple_bb (stmt);
 885
 886   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 887     return false;
 888
 889   if (stmt_bb != cand_bb)
 890     return true;
 891
 892   if (true_if_equal
 893       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 894     return true;
 895   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 896 }
 897
 898 /* Returns true if STMT if after the place where the induction variable
 899    CAND is incremented in LOOP.  */
 900
 901 static bool
 902 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 903 {
 904   switch (cand->pos)
 905     {
 906     case IP_END:
 907       return false;
 908
 909     case IP_NORMAL:
 910       return stmt_after_ip_normal_pos (loop, stmt);
 911
 912     case IP_ORIGINAL:
 913     case IP_AFTER_USE:
 914       return stmt_after_inc_pos (cand, stmt, false);
 915
 916     case IP_BEFORE_USE:
 917       return stmt_after_inc_pos (cand, stmt, true);
 918
 919     default:
 920       gcc_unreachable ();
 921     }
 922 }
 923
 924 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 925
 926 static bool
 927 abnormal_ssa_name_p (tree exp)
 928 {
 929   if (!exp)
 930     return false;
 931
 932   if (TREE_CODE (exp) != SSA_NAME)
 933     return false;
 934
 935   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 936 }
 937
 938 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 939    abnormal phi node.  Callback for for_each_index.  */
 940
 941 static bool
 942 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 943                                   void *data ATTRIBUTE_UNUSED)
 944 {
 945   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 946     {
 947       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 948         return false;
 949       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 950         return false;
 951     }
 952
 953   return !abnormal_ssa_name_p (*index);
 954 }
 955
 956 /* Returns true if EXPR contains a ssa name that occurs in an
 957    abnormal phi node.  */
 958
 959 bool
 960 contains_abnormal_ssa_name_p (tree expr)
 961 {
 962   enum tree_code code;
 963   enum tree_code_class codeclass;
 964
 965   if (!expr)
 966     return false;
 967
 968   code = TREE_CODE (expr);
 969   codeclass = TREE_CODE_CLASS (code);
 970
 971   if (code == SSA_NAME)
 972     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 973
 974   if (code == INTEGER_CST
 975       || is_gimple_min_invariant (expr))
 976     return false;
 977
 978   if (code == ADDR_EXPR)
 979     return !for_each_index (&TREE_OPERAND (expr, 0),
 980                             idx_contains_abnormal_ssa_name_p,
 981                             NULL);
 982
 983   if (code == COND_EXPR)
 984     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 985       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 986       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 987
 988   switch (codeclass)
 989     {
 990     case tcc_binary:
 991     case tcc_comparison:
 992       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 993         return true;
 994
 995       /* Fallthru.  */
 996     case tcc_unary:
 997       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 998         return true;
 999
1000       break;
1001
1002     default:
1003       gcc_unreachable ();
1004     }
1005
1006   return false;
1007 }
1008
1009 /*  Returns the structure describing number of iterations determined from
1010     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1011
1012 static struct tree_niter_desc *
1013 niter_for_exit (struct ivopts_data *data, edge exit)
1014 {
1015   struct tree_niter_desc *desc;
1016   tree_niter_desc **slot;
1017
1018   if (!data->niters)
1019     {
1020       data->niters = new hash_map<edge, tree_niter_desc *>;
1021       slot = NULL;
1022     }
1023   else
1024     slot = data->niters->get (exit);
1025
1026   if (!slot)
1027     {
1028       /* Try to determine number of iterations.  We cannot safely work with ssa
1029          names that appear in phi nodes on abnormal edges, so that we do not
1030          create overlapping life ranges for them (PR 27283).  */
1031       desc = XNEW (struct tree_niter_desc);
1032       if (!number_of_iterations_exit (data->current_loop,
1033                                       exit, desc, true)
1034           || contains_abnormal_ssa_name_p (desc->niter))
1035         {
1036           XDELETE (desc);
1037           desc = NULL;
1038         }
1039       data->niters->put (exit, desc);
1040     }
1041   else
1042     desc = *slot;
1043
1044   return desc;
1045 }
1046
1047 /* Returns the structure describing number of iterations determined from
1048    single dominating exit of DATA->current_loop, or NULL if something
1049    goes wrong.  */
1050
1051 static struct tree_niter_desc *
1052 niter_for_single_dom_exit (struct ivopts_data *data)
1053 {
1054   edge exit = single_dom_exit (data->current_loop);
1055
1056   if (!exit)
1057     return NULL;
1058
1059   return niter_for_exit (data, exit);
1060 }
1061
1062 /* Initializes data structures used by the iv optimization pass, stored
1063    in DATA.  */
1064
1065 static void
1066 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1067 {
1068   data->version_info_size = 2 * num_ssa_names;
1069   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1070   data->relevant = BITMAP_ALLOC (NULL);
1071   data->important_candidates = BITMAP_ALLOC (NULL);
1072   data->max_inv_var_id = 0;
1073   data->max_inv_expr_id = 0;
1074   data->niters = NULL;
1075   data->vgroups.create (20);
1076   data->vcands.create (20);
1077   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1078   data->name_expansion_cache = NULL;
1079   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1080   data->iv_common_cands.create (20);
1081   decl_rtl_to_reset.create (20);
1082   gcc_obstack_init (&data->iv_obstack);
1083 }
1084
1085 /* Returns a memory object to that EXPR points.  In case we are able to
1086    determine that it does not point to any such object, NULL is returned.  */
1087
1088 static tree
1089 determine_base_object (tree expr)
1090 {
1091   enum tree_code code = TREE_CODE (expr);
1092   tree base, obj;
1093
1094   /* If this is a pointer casted to any type, we need to determine
1095      the base object for the pointer; so handle conversions before
1096      throwing away non-pointer expressions.  */
1097   if (CONVERT_EXPR_P (expr))
1098     return determine_base_object (TREE_OPERAND (expr, 0));
1099
1100   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1101     return NULL_TREE;
1102
1103   switch (code)
1104     {
1105     case INTEGER_CST:
1106       return NULL_TREE;
1107
1108     case ADDR_EXPR:
1109       obj = TREE_OPERAND (expr, 0);
1110       base = get_base_address (obj);
1111
1112       if (!base)
1113         return expr;
1114
1115       if (TREE_CODE (base) == MEM_REF)
1116         return determine_base_object (TREE_OPERAND (base, 0));
1117
1118       return fold_convert (ptr_type_node,
1119                            build_fold_addr_expr (base));
1120
1121     case POINTER_PLUS_EXPR:
1122       return determine_base_object (TREE_OPERAND (expr, 0));
1123
1124     case PLUS_EXPR:
1125     case MINUS_EXPR:
1126       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1127       gcc_unreachable ();
1128
1129     default:
1130       return fold_convert (ptr_type_node, expr);
1131     }
1132 }
1133
1134 /* Return true if address expression with non-DECL_P operand appears
1135    in EXPR.  */
1136
1137 static bool
1138 contain_complex_addr_expr (tree expr)
1139 {
1140   bool res = false;
1141
1142   STRIP_NOPS (expr);
1143   switch (TREE_CODE (expr))
1144     {
1145     case POINTER_PLUS_EXPR:
1146     case PLUS_EXPR:
1147     case MINUS_EXPR:
1148       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1149       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1150       break;
1151
1152     case ADDR_EXPR:
1153       return (!DECL_P (TREE_OPERAND (expr, 0)));
1154
1155     default:
1156       return false;
1157     }
1158
1159   return res;
1160 }
1161
1162 /* Allocates an induction variable with given initial value BASE and step STEP
1163    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1164
1165 static struct iv *
1166 alloc_iv (struct ivopts_data *data, tree base, tree step,
1167           bool no_overflow = false)
1168 {
1169   tree expr = base;
1170   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1171                                               sizeof (struct iv));
1172   gcc_assert (step != NULL_TREE);
1173
1174   /* Lower address expression in base except ones with DECL_P as operand.
1175      By doing this:
1176        1) More accurate cost can be computed for address expressions;
1177        2) Duplicate candidates won't be created for bases in different
1178           forms, like &a[0] and &a.  */
1179   STRIP_NOPS (expr);
1180   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1181       || contain_complex_addr_expr (expr))
1182     {
1183       aff_tree comb;
1184       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1185       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1186     }
1187
1188   iv->base = base;
1189   iv->base_object = determine_base_object (base);
1190   iv->step = step;
1191   iv->biv_p = false;
1192   iv->nonlin_use = NULL;
1193   iv->ssa_name = NULL_TREE;
1194   if (!no_overflow
1195        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1196                               base, step))
1197     no_overflow = true;
1198   iv->no_overflow = no_overflow;
1199   iv->have_address_use = false;
1200
1201   return iv;
1202 }
1203
1204 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1205    doesn't overflow.  */
1206
1207 static void
1208 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1209         bool no_overflow)
1210 {
1211   struct version_info *info = name_info (data, iv);
1212
1213   gcc_assert (!info->iv);
1214
1215   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1216   info->iv = alloc_iv (data, base, step, no_overflow);
1217   info->iv->ssa_name = iv;
1218 }
1219
1220 /* Finds induction variable declaration for VAR.  */
1221
1222 static struct iv *
1223 get_iv (struct ivopts_data *data, tree var)
1224 {
1225   basic_block bb;
1226   tree type = TREE_TYPE (var);
1227
1228   if (!POINTER_TYPE_P (type)
1229       && !INTEGRAL_TYPE_P (type))
1230     return NULL;
1231
1232   if (!name_info (data, var)->iv)
1233     {
1234       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1235
1236       if (!bb
1237           || !flow_bb_inside_loop_p (data->current_loop, bb))
1238         set_iv (data, var, var, build_int_cst (type, 0), true);
1239     }
1240
1241   return name_info (data, var)->iv;
1242 }
1243
1244 /* Return the first non-invariant ssa var found in EXPR.  */
1245
1246 static tree
1247 extract_single_var_from_expr (tree expr)
1248 {
1249   int i, n;
1250   tree tmp;
1251   enum tree_code code;
1252
1253   if (!expr || is_gimple_min_invariant (expr))
1254     return NULL;
1255
1256   code = TREE_CODE (expr);
1257   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1258     {
1259       n = TREE_OPERAND_LENGTH (expr);
1260       for (i = 0; i < n; i++)
1261         {
1262           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1263
1264           if (tmp)
1265             return tmp;
1266         }
1267     }
1268   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1269 }
1270
1271 /* Finds basic ivs.  */
1272
1273 static bool
1274 find_bivs (struct ivopts_data *data)
1275 {
1276   gphi *phi;
1277   affine_iv iv;
1278   tree step, type, base, stop;
1279   bool found = false;
1280   struct loop *loop = data->current_loop;
1281   gphi_iterator psi;
1282
1283   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1284     {
1285       phi = psi.phi ();
1286
1287       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1288         continue;
1289
1290       if (virtual_operand_p (PHI_RESULT (phi)))
1291         continue;
1292
1293       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1294         continue;
1295
1296       if (integer_zerop (iv.step))
1297         continue;
1298
1299       step = iv.step;
1300       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1301       /* Stop expanding iv base at the first ssa var referred by iv step.
1302          Ideally we should stop at any ssa var, because that's expensive
1303          and unusual to happen, we just do it on the first one.
1304
1305          See PR64705 for the rationale.  */
1306       stop = extract_single_var_from_expr (step);
1307       base = expand_simple_operations (base, stop);
1308       if (contains_abnormal_ssa_name_p (base)
1309           || contains_abnormal_ssa_name_p (step))
1310         continue;
1311
1312       type = TREE_TYPE (PHI_RESULT (phi));
1313       base = fold_convert (type, base);
1314       if (step)
1315         {
1316           if (POINTER_TYPE_P (type))
1317             step = convert_to_ptrofftype (step);
1318           else
1319             step = fold_convert (type, step);
1320         }
1321
1322       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1323       found = true;
1324     }
1325
1326   return found;
1327 }
1328
1329 /* Marks basic ivs.  */
1330
1331 static void
1332 mark_bivs (struct ivopts_data *data)
1333 {
1334   gphi *phi;
1335   gimple *def;
1336   tree var;
1337   struct iv *iv, *incr_iv;
1338   struct loop *loop = data->current_loop;
1339   basic_block incr_bb;
1340   gphi_iterator psi;
1341
1342   data->bivs_not_used_in_addr = 0;
1343   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1344     {
1345       phi = psi.phi ();
1346
1347       iv = get_iv (data, PHI_RESULT (phi));
1348       if (!iv)
1349         continue;
1350
1351       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1352       def = SSA_NAME_DEF_STMT (var);
1353       /* Don't mark iv peeled from other one as biv.  */
1354       if (def
1355           && gimple_code (def) == GIMPLE_PHI
1356           && gimple_bb (def) == loop->header)
1357         continue;
1358
1359       incr_iv = get_iv (data, var);
1360       if (!incr_iv)
1361         continue;
1362
1363       /* If the increment is in the subloop, ignore it.  */
1364       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1365       if (incr_bb->loop_father != data->current_loop
1366           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1367         continue;
1368
1369       iv->biv_p = true;
1370       incr_iv->biv_p = true;
1371       if (iv->no_overflow)
1372         data->bivs_not_used_in_addr++;
1373       if (incr_iv->no_overflow)
1374         data->bivs_not_used_in_addr++;
1375     }
1376 }
1377
1378 /* Checks whether STMT defines a linear induction variable and stores its
1379    parameters to IV.  */
1380
1381 static bool
1382 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1383 {
1384   tree lhs, stop;
1385   struct loop *loop = data->current_loop;
1386
1387   iv->base = NULL_TREE;
1388   iv->step = NULL_TREE;
1389
1390   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1391     return false;
1392
1393   lhs = gimple_assign_lhs (stmt);
1394   if (TREE_CODE (lhs) != SSA_NAME)
1395     return false;
1396
1397   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1398     return false;
1399
1400   /* Stop expanding iv base at the first ssa var referred by iv step.
1401      Ideally we should stop at any ssa var, because that's expensive
1402      and unusual to happen, we just do it on the first one.
1403
1404      See PR64705 for the rationale.  */
1405   stop = extract_single_var_from_expr (iv->step);
1406   iv->base = expand_simple_operations (iv->base, stop);
1407   if (contains_abnormal_ssa_name_p (iv->base)
1408       || contains_abnormal_ssa_name_p (iv->step))
1409     return false;
1410
1411   /* If STMT could throw, then do not consider STMT as defining a GIV.
1412      While this will suppress optimizations, we can not safely delete this
1413      GIV and associated statements, even if it appears it is not used.  */
1414   if (stmt_could_throw_p (stmt))
1415     return false;
1416
1417   return true;
1418 }
1419
1420 /* Finds general ivs in statement STMT.  */
1421
1422 static void
1423 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1424 {
1425   affine_iv iv;
1426
1427   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1428     return;
1429
1430   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1431 }
1432
1433 /* Finds general ivs in basic block BB.  */
1434
1435 static void
1436 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1437 {
1438   gimple_stmt_iterator bsi;
1439
1440   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1441     find_givs_in_stmt (data, gsi_stmt (bsi));
1442 }
1443
1444 /* Finds general ivs.  */
1445
1446 static void
1447 find_givs (struct ivopts_data *data)
1448 {
1449   struct loop *loop = data->current_loop;
1450   basic_block *body = get_loop_body_in_dom_order (loop);
1451   unsigned i;
1452
1453   for (i = 0; i < loop->num_nodes; i++)
1454     find_givs_in_bb (data, body[i]);
1455   free (body);
1456 }
1457
1458 /* For each ssa name defined in LOOP determines whether it is an induction
1459    variable and if so, its initial value and step.  */
1460
1461 static bool
1462 find_induction_variables (struct ivopts_data *data)
1463 {
1464   unsigned i;
1465   bitmap_iterator bi;
1466
1467   if (!find_bivs (data))
1468     return false;
1469
1470   find_givs (data);
1471   mark_bivs (data);
1472
1473   if (dump_file && (dump_flags & TDF_DETAILS))
1474     {
1475       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1476
1477       if (niter)
1478         {
1479           fprintf (dump_file, "  number of iterations ");
1480           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1481           if (!integer_zerop (niter->may_be_zero))
1482             {
1483               fprintf (dump_file, "; zero if ");
1484               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1485             }
1486           fprintf (dump_file, "\n");
1487         };
1488
1489       fprintf (dump_file, "\n<Induction Vars>:\n");
1490       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1491         {
1492           struct version_info *info = ver_info (data, i);
1493           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1494             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1495         }
1496     }
1497
1498   return true;
1499 }
1500
1501 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1502    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1503    is the const offset stripped from IV base; for other types use, both
1504    are zero by default.  */
1505
1506 static struct iv_use *
1507 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1508             gimple *stmt, enum use_type type, tree addr_base,
1509             unsigned HOST_WIDE_INT addr_offset)
1510 {
1511   struct iv_use *use = XCNEW (struct iv_use);
1512
1513   use->id = group->vuses.length ();
1514   use->group_id = group->id;
1515   use->type = type;
1516   use->iv = iv;
1517   use->stmt = stmt;
1518   use->op_p = use_p;
1519   use->addr_base = addr_base;
1520   use->addr_offset = addr_offset;
1521
1522   group->vuses.safe_push (use);
1523   return use;
1524 }
1525
1526 /* Checks whether OP is a loop-level invariant and if so, records it.
1527    NONLINEAR_USE is true if the invariant is used in a way we do not
1528    handle specially.  */
1529
1530 static void
1531 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1532 {
1533   basic_block bb;
1534   struct version_info *info;
1535
1536   if (TREE_CODE (op) != SSA_NAME
1537       || virtual_operand_p (op))
1538     return;
1539
1540   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1541   if (bb
1542       && flow_bb_inside_loop_p (data->current_loop, bb))
1543     return;
1544
1545   info = name_info (data, op);
1546   info->name = op;
1547   info->has_nonlin_use |= nonlinear_use;
1548   if (!info->inv_id)
1549     info->inv_id = ++data->max_inv_var_id;
1550   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1551 }
1552
1553 static tree
1554 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1555
1556 /* Record a group of TYPE.  */
1557
1558 static struct iv_group *
1559 record_group (struct ivopts_data *data, enum use_type type)
1560 {
1561   struct iv_group *group = XCNEW (struct iv_group);
1562
1563   group->id = data->vgroups.length ();
1564   group->type = type;
1565   group->related_cands = BITMAP_ALLOC (NULL);
1566   group->vuses.create (1);
1567
1568   data->vgroups.safe_push (group);
1569   return group;
1570 }
1571
1572 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1573    New group will be created if there is no existing group for the use.  */
1574
1575 static struct iv_use *
1576 record_group_use (struct ivopts_data *data, tree *use_p,
1577                   struct iv *iv, gimple *stmt, enum use_type type)
1578 {
1579   tree addr_base = NULL;
1580   struct iv_group *group = NULL;
1581   unsigned HOST_WIDE_INT addr_offset = 0;
1582
1583   /* Record non address type use in a new group.  */
1584   if (type == USE_ADDRESS && iv->base_object)
1585     {
1586       unsigned int i;
1587
1588       addr_base = strip_offset (iv->base, &addr_offset);
1589       for (i = 0; i < data->vgroups.length (); i++)
1590         {
1591           struct iv_use *use;
1592
1593           group = data->vgroups[i];
1594           use = group->vuses[0];
1595           if (use->type != USE_ADDRESS || !use->iv->base_object)
1596             continue;
1597
1598           /* Check if it has the same stripped base and step.  */
1599           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1600               && operand_equal_p (iv->step, use->iv->step, 0)
1601               && operand_equal_p (addr_base, use->addr_base, 0))
1602             break;
1603         }
1604       if (i == data->vgroups.length ())
1605         group = NULL;
1606     }
1607
1608   if (!group)
1609     group = record_group (data, type);
1610
1611   return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1612 }
1613
1614 /* Checks whether the use OP is interesting and if so, records it.  */
1615
1616 static struct iv_use *
1617 find_interesting_uses_op (struct ivopts_data *data, tree op)
1618 {
1619   struct iv *iv;
1620   gimple *stmt;
1621   struct iv_use *use;
1622
1623   if (TREE_CODE (op) != SSA_NAME)
1624     return NULL;
1625
1626   iv = get_iv (data, op);
1627   if (!iv)
1628     return NULL;
1629
1630   if (iv->nonlin_use)
1631     {
1632       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1633       return iv->nonlin_use;
1634     }
1635
1636   if (integer_zerop (iv->step))
1637     {
1638       record_invariant (data, op, true);
1639       return NULL;
1640     }
1641
1642   stmt = SSA_NAME_DEF_STMT (op);
1643   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1644
1645   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1646   iv->nonlin_use = use;
1647   return use;
1648 }
1649
1650 /* Indicate how compare type iv_use can be handled.  */
1651 enum comp_iv_rewrite
1652 {
1653   COMP_IV_NA,
1654   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1655   COMP_IV_EXPR,
1656   /* We may rewrite compare type iv_uses on both sides of comparison by
1657      expressing value of each iv_use.  */
1658   COMP_IV_EXPR_2,
1659   /* We may rewrite compare type iv_use by expressing value of the iv_use
1660      or by eliminating it with other iv_cand.  */
1661   COMP_IV_ELIM
1662 };
1663
1664 /* Given a condition in statement STMT, checks whether it is a compare
1665    of an induction variable and an invariant.  If this is the case,
1666    CONTROL_VAR is set to location of the iv, BOUND to the location of
1667    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1668    induction variable descriptions, and true is returned.  If this is not
1669    the case, CONTROL_VAR and BOUND are set to the arguments of the
1670    condition and false is returned.  */
1671
1672 static enum comp_iv_rewrite
1673 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1674                        tree **control_var, tree **bound,
1675                        struct iv **iv_var, struct iv **iv_bound)
1676 {
1677   /* The objects returned when COND has constant operands.  */
1678   static struct iv const_iv;
1679   static tree zero;
1680   tree *op0 = &zero, *op1 = &zero;
1681   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1682   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1683
1684   if (gimple_code (stmt) == GIMPLE_COND)
1685     {
1686       gcond *cond_stmt = as_a <gcond *> (stmt);
1687       op0 = gimple_cond_lhs_ptr (cond_stmt);
1688       op1 = gimple_cond_rhs_ptr (cond_stmt);
1689     }
1690   else
1691     {
1692       op0 = gimple_assign_rhs1_ptr (stmt);
1693       op1 = gimple_assign_rhs2_ptr (stmt);
1694     }
1695
1696   zero = integer_zero_node;
1697   const_iv.step = integer_zero_node;
1698
1699   if (TREE_CODE (*op0) == SSA_NAME)
1700     iv0 = get_iv (data, *op0);
1701   if (TREE_CODE (*op1) == SSA_NAME)
1702     iv1 = get_iv (data, *op1);
1703
1704   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1705   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1706     {
1707       rewrite_type = COMP_IV_EXPR_2;
1708       goto end;
1709     }
1710
1711   /* If none side of comparison is IV.  */
1712   if ((!iv0 || integer_zerop (iv0->step))
1713       && (!iv1 || integer_zerop (iv1->step)))
1714     goto end;
1715
1716   /* Control variable may be on the other side.  */
1717   if (!iv0 || integer_zerop (iv0->step))
1718     {
1719       std::swap (op0, op1);
1720       std::swap (iv0, iv1);
1721     }
1722   /* If one side is IV and the other side isn't loop invariant.  */
1723   if (!iv1)
1724     rewrite_type = COMP_IV_EXPR;
1725   /* If one side is IV and the other side is loop invariant.  */
1726   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1727     rewrite_type = COMP_IV_ELIM;
1728
1729 end:
1730   if (control_var)
1731     *control_var = op0;
1732   if (iv_var)
1733     *iv_var = iv0;
1734   if (bound)
1735     *bound = op1;
1736   if (iv_bound)
1737     *iv_bound = iv1;
1738
1739   return rewrite_type;
1740 }
1741
1742 /* Checks whether the condition in STMT is interesting and if so,
1743    records it.  */
1744
1745 static void
1746 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1747 {
1748   tree *var_p, *bound_p;
1749   struct iv *var_iv, *bound_iv;
1750   enum comp_iv_rewrite ret;
1751
1752   ret = extract_cond_operands (data, stmt,
1753                                &var_p, &bound_p, &var_iv, &bound_iv);
1754   if (ret == COMP_IV_NA)
1755     {
1756       find_interesting_uses_op (data, *var_p);
1757       find_interesting_uses_op (data, *bound_p);
1758       return;
1759     }
1760
1761   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE);
1762   /* Record compare type iv_use for iv on the other side of comparison.  */
1763   if (ret == COMP_IV_EXPR_2)
1764     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE);
1765 }
1766
1767 /* Returns the outermost loop EXPR is obviously invariant in
1768    relative to the loop LOOP, i.e. if all its operands are defined
1769    outside of the returned loop.  Returns NULL if EXPR is not
1770    even obviously invariant in LOOP.  */
1771
1772 struct loop *
1773 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1774 {
1775   basic_block def_bb;
1776   unsigned i, len;
1777
1778   if (is_gimple_min_invariant (expr))
1779     return current_loops->tree_root;
1780
1781   if (TREE_CODE (expr) == SSA_NAME)
1782     {
1783       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1784       if (def_bb)
1785         {
1786           if (flow_bb_inside_loop_p (loop, def_bb))
1787             return NULL;
1788           return superloop_at_depth (loop,
1789                                      loop_depth (def_bb->loop_father) + 1);
1790         }
1791
1792       return current_loops->tree_root;
1793     }
1794
1795   if (!EXPR_P (expr))
1796     return NULL;
1797
1798   unsigned maxdepth = 0;
1799   len = TREE_OPERAND_LENGTH (expr);
1800   for (i = 0; i < len; i++)
1801     {
1802       struct loop *ivloop;
1803       if (!TREE_OPERAND (expr, i))
1804         continue;
1805
1806       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1807       if (!ivloop)
1808         return NULL;
1809       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1810     }
1811
1812   return superloop_at_depth (loop, maxdepth);
1813 }
1814
1815 /* Returns true if expression EXPR is obviously invariant in LOOP,
1816    i.e. if all its operands are defined outside of the LOOP.  LOOP
1817    should not be the function body.  */
1818
1819 bool
1820 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1821 {
1822   basic_block def_bb;
1823   unsigned i, len;
1824
1825   gcc_assert (loop_depth (loop) > 0);
1826
1827   if (is_gimple_min_invariant (expr))
1828     return true;
1829
1830   if (TREE_CODE (expr) == SSA_NAME)
1831     {
1832       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1833       if (def_bb
1834           && flow_bb_inside_loop_p (loop, def_bb))
1835         return false;
1836
1837       return true;
1838     }
1839
1840   if (!EXPR_P (expr))
1841     return false;
1842
1843   len = TREE_OPERAND_LENGTH (expr);
1844   for (i = 0; i < len; i++)
1845     if (TREE_OPERAND (expr, i)
1846         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1847       return false;
1848
1849   return true;
1850 }
1851
1852 /* Given expression EXPR which computes inductive values with respect
1853    to loop recorded in DATA, this function returns biv from which EXPR
1854    is derived by tracing definition chains of ssa variables in EXPR.  */
1855
1856 static struct iv*
1857 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1858 {
1859   struct iv *iv;
1860   unsigned i, n;
1861   tree e2, e1;
1862   enum tree_code code;
1863   gimple *stmt;
1864
1865   if (expr == NULL_TREE)
1866     return NULL;
1867
1868   if (is_gimple_min_invariant (expr))
1869     return NULL;
1870
1871   code = TREE_CODE (expr);
1872   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1873     {
1874       n = TREE_OPERAND_LENGTH (expr);
1875       for (i = 0; i < n; i++)
1876         {
1877           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1878           if (iv)
1879             return iv;
1880         }
1881     }
1882
1883   /* Stop if it's not ssa name.  */
1884   if (code != SSA_NAME)
1885     return NULL;
1886
1887   iv = get_iv (data, expr);
1888   if (!iv || integer_zerop (iv->step))
1889     return NULL;
1890   else if (iv->biv_p)
1891     return iv;
1892
1893   stmt = SSA_NAME_DEF_STMT (expr);
1894   if (gphi *phi = dyn_cast <gphi *> (stmt))
1895     {
1896       ssa_op_iter iter;
1897       use_operand_p use_p;
1898       basic_block phi_bb = gimple_bb (phi);
1899
1900       /* Skip loop header PHI that doesn't define biv.  */
1901       if (phi_bb->loop_father == data->current_loop)
1902         return NULL;
1903
1904       if (virtual_operand_p (gimple_phi_result (phi)))
1905         return NULL;
1906
1907       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1908         {
1909           tree use = USE_FROM_PTR (use_p);
1910           iv = find_deriving_biv_for_expr (data, use);
1911           if (iv)
1912             return iv;
1913         }
1914       return NULL;
1915     }
1916   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1917     return NULL;
1918
1919   e1 = gimple_assign_rhs1 (stmt);
1920   code = gimple_assign_rhs_code (stmt);
1921   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1922     return find_deriving_biv_for_expr (data, e1);
1923
1924   switch (code)
1925     {
1926     case MULT_EXPR:
1927     case PLUS_EXPR:
1928     case MINUS_EXPR:
1929     case POINTER_PLUS_EXPR:
1930       /* Increments, decrements and multiplications by a constant
1931          are simple.  */
1932       e2 = gimple_assign_rhs2 (stmt);
1933       iv = find_deriving_biv_for_expr (data, e2);
1934       if (iv)
1935         return iv;
1936       gcc_fallthrough ();
1937
1938     CASE_CONVERT:
1939       /* Casts are simple.  */
1940       return find_deriving_biv_for_expr (data, e1);
1941
1942     default:
1943       break;
1944     }
1945
1946   return NULL;
1947 }
1948
1949 /* Record BIV, its predecessor and successor that they are used in
1950    address type uses.  */
1951
1952 static void
1953 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1954 {
1955   unsigned i;
1956   tree type, base_1, base_2;
1957   bitmap_iterator bi;
1958
1959   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1960       || biv->have_address_use || !biv->no_overflow)
1961     return;
1962
1963   type = TREE_TYPE (biv->base);
1964   if (!INTEGRAL_TYPE_P (type))
1965     return;
1966
1967   biv->have_address_use = true;
1968   data->bivs_not_used_in_addr--;
1969   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1970   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1971     {
1972       struct iv *iv = ver_info (data, i)->iv;
1973
1974       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1975           || iv->have_address_use || !iv->no_overflow)
1976         continue;
1977
1978       if (type != TREE_TYPE (iv->base)
1979           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1980         continue;
1981
1982       if (!operand_equal_p (biv->step, iv->step, 0))
1983         continue;
1984
1985       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1986       if (operand_equal_p (base_1, iv->base, 0)
1987           || operand_equal_p (base_2, biv->base, 0))
1988         {
1989           iv->have_address_use = true;
1990           data->bivs_not_used_in_addr--;
1991         }
1992     }
1993 }
1994
1995 /* Cumulates the steps of indices into DATA and replaces their values with the
1996    initial ones.  Returns false when the value of the index cannot be determined.
1997    Callback for for_each_index.  */
1998
1999 struct ifs_ivopts_data
2000 {
2001   struct ivopts_data *ivopts_data;
2002   gimple *stmt;
2003   tree step;
2004 };
2005
2006 static bool
2007 idx_find_step (tree base, tree *idx, void *data)
2008 {
2009   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2010   struct iv *iv;
2011   bool use_overflow_semantics = false;
2012   tree step, iv_base, iv_step, lbound, off;
2013   struct loop *loop = dta->ivopts_data->current_loop;
2014
2015   /* If base is a component ref, require that the offset of the reference
2016      be invariant.  */
2017   if (TREE_CODE (base) == COMPONENT_REF)
2018     {
2019       off = component_ref_field_offset (base);
2020       return expr_invariant_in_loop_p (loop, off);
2021     }
2022
2023   /* If base is array, first check whether we will be able to move the
2024      reference out of the loop (in order to take its address in strength
2025      reduction).  In order for this to work we need both lower bound
2026      and step to be loop invariants.  */
2027   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2028     {
2029       /* Moreover, for a range, the size needs to be invariant as well.  */
2030       if (TREE_CODE (base) == ARRAY_RANGE_REF
2031           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2032         return false;
2033
2034       step = array_ref_element_size (base);
2035       lbound = array_ref_low_bound (base);
2036
2037       if (!expr_invariant_in_loop_p (loop, step)
2038           || !expr_invariant_in_loop_p (loop, lbound))
2039         return false;
2040     }
2041
2042   if (TREE_CODE (*idx) != SSA_NAME)
2043     return true;
2044
2045   iv = get_iv (dta->ivopts_data, *idx);
2046   if (!iv)
2047     return false;
2048
2049   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2050           *&x[0], which is not folded and does not trigger the
2051           ARRAY_REF path below.  */
2052   *idx = iv->base;
2053
2054   if (integer_zerop (iv->step))
2055     return true;
2056
2057   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2058     {
2059       step = array_ref_element_size (base);
2060
2061       /* We only handle addresses whose step is an integer constant.  */
2062       if (TREE_CODE (step) != INTEGER_CST)
2063         return false;
2064     }
2065   else
2066     /* The step for pointer arithmetics already is 1 byte.  */
2067     step = size_one_node;
2068
2069   iv_base = iv->base;
2070   iv_step = iv->step;
2071   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2072     use_overflow_semantics = true;
2073
2074   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2075                             sizetype, &iv_base, &iv_step, dta->stmt,
2076                             use_overflow_semantics))
2077     {
2078       /* The index might wrap.  */
2079       return false;
2080     }
2081
2082   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2083   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2084
2085   if (dta->ivopts_data->bivs_not_used_in_addr)
2086     {
2087       if (!iv->biv_p)
2088         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2089
2090       record_biv_for_address_use (dta->ivopts_data, iv);
2091     }
2092   return true;
2093 }
2094
2095 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2096    object is passed to it in DATA.  */
2097
2098 static bool
2099 idx_record_use (tree base, tree *idx,
2100                 void *vdata)
2101 {
2102   struct ivopts_data *data = (struct ivopts_data *) vdata;
2103   find_interesting_uses_op (data, *idx);
2104   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2105     {
2106       find_interesting_uses_op (data, array_ref_element_size (base));
2107       find_interesting_uses_op (data, array_ref_low_bound (base));
2108     }
2109   return true;
2110 }
2111
2112 /* If we can prove that TOP = cst * BOT for some constant cst,
2113    store cst to MUL and return true.  Otherwise return false.
2114    The returned value is always sign-extended, regardless of the
2115    signedness of TOP and BOT.  */
2116
2117 static bool
2118 constant_multiple_of (tree top, tree bot, widest_int *mul)
2119 {
2120   tree mby;
2121   enum tree_code code;
2122   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2123   widest_int res, p0, p1;
2124
2125   STRIP_NOPS (top);
2126   STRIP_NOPS (bot);
2127
2128   if (operand_equal_p (top, bot, 0))
2129     {
2130       *mul = 1;
2131       return true;
2132     }
2133
2134   code = TREE_CODE (top);
2135   switch (code)
2136     {
2137     case MULT_EXPR:
2138       mby = TREE_OPERAND (top, 1);
2139       if (TREE_CODE (mby) != INTEGER_CST)
2140         return false;
2141
2142       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2143         return false;
2144
2145       *mul = wi::sext (res * wi::to_widest (mby), precision);
2146       return true;
2147
2148     case PLUS_EXPR:
2149     case MINUS_EXPR:
2150       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2151           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2152         return false;
2153
2154       if (code == MINUS_EXPR)
2155         p1 = -p1;
2156       *mul = wi::sext (p0 + p1, precision);
2157       return true;
2158
2159     case INTEGER_CST:
2160       if (TREE_CODE (bot) != INTEGER_CST)
2161         return false;
2162
2163       p0 = widest_int::from (top, SIGNED);
2164       p1 = widest_int::from (bot, SIGNED);
2165       if (p1 == 0)
2166         return false;
2167       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2168       return res == 0;
2169
2170     default:
2171       return false;
2172     }
2173 }
2174
2175 /* Return true if memory reference REF with step STEP may be unaligned.  */
2176
2177 static bool
2178 may_be_unaligned_p (tree ref, tree step)
2179 {
2180   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2181      thus they are not misaligned.  */
2182   if (TREE_CODE (ref) == TARGET_MEM_REF)
2183     return false;
2184
2185   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2186   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2187     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2188
2189   unsigned HOST_WIDE_INT bitpos;
2190   unsigned int ref_align;
2191   get_object_alignment_1 (ref, &ref_align, &bitpos);
2192   if (ref_align < align
2193       || (bitpos % align) != 0
2194       || (bitpos % BITS_PER_UNIT) != 0)
2195     return true;
2196
2197   unsigned int trailing_zeros = tree_ctz (step);
2198   if (trailing_zeros < HOST_BITS_PER_INT
2199       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2200     return true;
2201
2202   return false;
2203 }
2204
2205 /* Return true if EXPR may be non-addressable.   */
2206
2207 bool
2208 may_be_nonaddressable_p (tree expr)
2209 {
2210   switch (TREE_CODE (expr))
2211     {
2212     case TARGET_MEM_REF:
2213       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2214          target, thus they are always addressable.  */
2215       return false;
2216
2217     case MEM_REF:
2218       /* Likewise for MEM_REFs, modulo the storage order.  */
2219       return REF_REVERSE_STORAGE_ORDER (expr);
2220
2221     case BIT_FIELD_REF:
2222       if (REF_REVERSE_STORAGE_ORDER (expr))
2223         return true;
2224       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2225
2226     case COMPONENT_REF:
2227       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2228         return true;
2229       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2230              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2231
2232     case ARRAY_REF:
2233     case ARRAY_RANGE_REF:
2234       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2235         return true;
2236       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2237
2238     case VIEW_CONVERT_EXPR:
2239       /* This kind of view-conversions may wrap non-addressable objects
2240          and make them look addressable.  After some processing the
2241          non-addressability may be uncovered again, causing ADDR_EXPRs
2242          of inappropriate objects to be built.  */
2243       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2244           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2245         return true;
2246       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2247
2248     CASE_CONVERT:
2249       return true;
2250
2251     default:
2252       break;
2253     }
2254
2255   return false;
2256 }
2257
2258 /* Finds addresses in *OP_P inside STMT.  */
2259
2260 static void
2261 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2262                                tree *op_p)
2263 {
2264   tree base = *op_p, step = size_zero_node;
2265   struct iv *civ;
2266   struct ifs_ivopts_data ifs_ivopts_data;
2267
2268   /* Do not play with volatile memory references.  A bit too conservative,
2269      perhaps, but safe.  */
2270   if (gimple_has_volatile_ops (stmt))
2271     goto fail;
2272
2273   /* Ignore bitfields for now.  Not really something terribly complicated
2274      to handle.  TODO.  */
2275   if (TREE_CODE (base) == BIT_FIELD_REF)
2276     goto fail;
2277
2278   base = unshare_expr (base);
2279
2280   if (TREE_CODE (base) == TARGET_MEM_REF)
2281     {
2282       tree type = build_pointer_type (TREE_TYPE (base));
2283       tree astep;
2284
2285       if (TMR_BASE (base)
2286           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2287         {
2288           civ = get_iv (data, TMR_BASE (base));
2289           if (!civ)
2290             goto fail;
2291
2292           TMR_BASE (base) = civ->base;
2293           step = civ->step;
2294         }
2295       if (TMR_INDEX2 (base)
2296           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2297         {
2298           civ = get_iv (data, TMR_INDEX2 (base));
2299           if (!civ)
2300             goto fail;
2301
2302           TMR_INDEX2 (base) = civ->base;
2303           step = civ->step;
2304         }
2305       if (TMR_INDEX (base)
2306           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2307         {
2308           civ = get_iv (data, TMR_INDEX (base));
2309           if (!civ)
2310             goto fail;
2311
2312           TMR_INDEX (base) = civ->base;
2313           astep = civ->step;
2314
2315           if (astep)
2316             {
2317               if (TMR_STEP (base))
2318                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2319
2320               step = fold_build2 (PLUS_EXPR, type, step, astep);
2321             }
2322         }
2323
2324       if (integer_zerop (step))
2325         goto fail;
2326       base = tree_mem_ref_addr (type, base);
2327     }
2328   else
2329     {
2330       ifs_ivopts_data.ivopts_data = data;
2331       ifs_ivopts_data.stmt = stmt;
2332       ifs_ivopts_data.step = size_zero_node;
2333       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2334           || integer_zerop (ifs_ivopts_data.step))
2335         goto fail;
2336       step = ifs_ivopts_data.step;
2337
2338       /* Check that the base expression is addressable.  This needs
2339          to be done after substituting bases of IVs into it.  */
2340       if (may_be_nonaddressable_p (base))
2341         goto fail;
2342
2343       /* Moreover, on strict alignment platforms, check that it is
2344          sufficiently aligned.  */
2345       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2346         goto fail;
2347
2348       base = build_fold_addr_expr (base);
2349
2350       /* Substituting bases of IVs into the base expression might
2351          have caused folding opportunities.  */
2352       if (TREE_CODE (base) == ADDR_EXPR)
2353         {
2354           tree *ref = &TREE_OPERAND (base, 0);
2355           while (handled_component_p (*ref))
2356             ref = &TREE_OPERAND (*ref, 0);
2357           if (TREE_CODE (*ref) == MEM_REF)
2358             {
2359               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2360                                       TREE_OPERAND (*ref, 0),
2361                                       TREE_OPERAND (*ref, 1));
2362               if (tem)
2363                 *ref = tem;
2364             }
2365         }
2366     }
2367
2368   civ = alloc_iv (data, base, step);
2369   /* Fail if base object of this memory reference is unknown.  */
2370   if (civ->base_object == NULL_TREE)
2371     goto fail;
2372
2373   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2374   return;
2375
2376 fail:
2377   for_each_index (op_p, idx_record_use, data);
2378 }
2379
2380 /* Finds and records invariants used in STMT.  */
2381
2382 static void
2383 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2384 {
2385   ssa_op_iter iter;
2386   use_operand_p use_p;
2387   tree op;
2388
2389   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2390     {
2391       op = USE_FROM_PTR (use_p);
2392       record_invariant (data, op, false);
2393     }
2394 }
2395
2396 /* Finds interesting uses of induction variables in the statement STMT.  */
2397
2398 static void
2399 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2400 {
2401   struct iv *iv;
2402   tree op, *lhs, *rhs;
2403   ssa_op_iter iter;
2404   use_operand_p use_p;
2405   enum tree_code code;
2406
2407   find_invariants_stmt (data, stmt);
2408
2409   if (gimple_code (stmt) == GIMPLE_COND)
2410     {
2411       find_interesting_uses_cond (data, stmt);
2412       return;
2413     }
2414
2415   if (is_gimple_assign (stmt))
2416     {
2417       lhs = gimple_assign_lhs_ptr (stmt);
2418       rhs = gimple_assign_rhs1_ptr (stmt);
2419
2420       if (TREE_CODE (*lhs) == SSA_NAME)
2421         {
2422           /* If the statement defines an induction variable, the uses are not
2423              interesting by themselves.  */
2424
2425           iv = get_iv (data, *lhs);
2426
2427           if (iv && !integer_zerop (iv->step))
2428             return;
2429         }
2430
2431       code = gimple_assign_rhs_code (stmt);
2432       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2433           && (REFERENCE_CLASS_P (*rhs)
2434               || is_gimple_val (*rhs)))
2435         {
2436           if (REFERENCE_CLASS_P (*rhs))
2437             find_interesting_uses_address (data, stmt, rhs);
2438           else
2439             find_interesting_uses_op (data, *rhs);
2440
2441           if (REFERENCE_CLASS_P (*lhs))
2442             find_interesting_uses_address (data, stmt, lhs);
2443           return;
2444         }
2445       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2446         {
2447           find_interesting_uses_cond (data, stmt);
2448           return;
2449         }
2450
2451       /* TODO -- we should also handle address uses of type
2452
2453          memory = call (whatever);
2454
2455          and
2456
2457          call (memory).  */
2458     }
2459
2460   if (gimple_code (stmt) == GIMPLE_PHI
2461       && gimple_bb (stmt) == data->current_loop->header)
2462     {
2463       iv = get_iv (data, PHI_RESULT (stmt));
2464
2465       if (iv && !integer_zerop (iv->step))
2466         return;
2467     }
2468
2469   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2470     {
2471       op = USE_FROM_PTR (use_p);
2472
2473       if (TREE_CODE (op) != SSA_NAME)
2474         continue;
2475
2476       iv = get_iv (data, op);
2477       if (!iv)
2478         continue;
2479
2480       find_interesting_uses_op (data, op);
2481     }
2482 }
2483
2484 /* Finds interesting uses of induction variables outside of loops
2485    on loop exit edge EXIT.  */
2486
2487 static void
2488 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2489 {
2490   gphi *phi;
2491   gphi_iterator psi;
2492   tree def;
2493
2494   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2495     {
2496       phi = psi.phi ();
2497       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2498       if (!virtual_operand_p (def))
2499         find_interesting_uses_op (data, def);
2500     }
2501 }
2502
2503 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2504    mode for memory reference represented by USE.  */
2505
2506 static GTY (()) vec<rtx, va_gc> *addr_list;
2507
2508 static bool
2509 addr_offset_valid_p (struct iv_use *use, HOST_WIDE_INT offset)
2510 {
2511   rtx reg, addr;
2512   unsigned list_index;
2513   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2514   machine_mode addr_mode, mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2515
2516   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2517   if (list_index >= vec_safe_length (addr_list))
2518     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2519
2520   addr = (*addr_list)[list_index];
2521   if (!addr)
2522     {
2523       addr_mode = targetm.addr_space.address_mode (as);
2524       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2525       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2526       (*addr_list)[list_index] = addr;
2527     }
2528   else
2529     addr_mode = GET_MODE (addr);
2530
2531   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2532   return (memory_address_addr_space_p (mem_mode, addr, as));
2533 }
2534
2535 /* Comparison function to sort group in ascending order of addr_offset.  */
2536
2537 static int
2538 group_compare_offset (const void *a, const void *b)
2539 {
2540   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2541   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2542
2543   if ((*u1)->addr_offset != (*u2)->addr_offset)
2544     return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2545   else
2546     return 0;
2547 }
2548
2549 /* Check if small groups should be split.  Return true if no group
2550    contains more than two uses with distinct addr_offsets.  Return
2551    false otherwise.  We want to split such groups because:
2552
2553      1) Small groups don't have much benefit and may interfer with
2554         general candidate selection.
2555      2) Size for problem with only small groups is usually small and
2556         general algorithm can handle it well.
2557
2558    TODO -- Above claim may not hold when we want to merge memory
2559    accesses with conseuctive addresses.  */
2560
2561 static bool
2562 split_small_address_groups_p (struct ivopts_data *data)
2563 {
2564   unsigned int i, j, distinct = 1;
2565   struct iv_use *pre;
2566   struct iv_group *group;
2567
2568   for (i = 0; i < data->vgroups.length (); i++)
2569     {
2570       group = data->vgroups[i];
2571       if (group->vuses.length () == 1)
2572         continue;
2573
2574       gcc_assert (group->type == USE_ADDRESS);
2575       if (group->vuses.length () == 2)
2576         {
2577           if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2578             std::swap (group->vuses[0], group->vuses[1]);
2579         }
2580       else
2581         group->vuses.qsort (group_compare_offset);
2582
2583       if (distinct > 2)
2584         continue;
2585
2586       distinct = 1;
2587       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2588         {
2589           if (group->vuses[j]->addr_offset != pre->addr_offset)
2590             {
2591               pre = group->vuses[j];
2592               distinct++;
2593             }
2594
2595           if (distinct > 2)
2596             break;
2597         }
2598     }
2599
2600   return (distinct <= 2);
2601 }
2602
2603 /* For each group of address type uses, this function further groups
2604    these uses according to the maximum offset supported by target's
2605    [base + offset] addressing mode.  */
2606
2607 static void
2608 split_address_groups (struct ivopts_data *data)
2609 {
2610   unsigned int i, j;
2611   /* Always split group.  */
2612   bool split_p = split_small_address_groups_p (data);
2613
2614   for (i = 0; i < data->vgroups.length (); i++)
2615     {
2616       struct iv_group *new_group = NULL;
2617       struct iv_group *group = data->vgroups[i];
2618       struct iv_use *use = group->vuses[0];
2619
2620       use->id = 0;
2621       use->group_id = group->id;
2622       if (group->vuses.length () == 1)
2623         continue;
2624
2625       gcc_assert (group->type == USE_ADDRESS);
2626
2627       for (j = 1; j < group->vuses.length ();)
2628         {
2629           struct iv_use *next = group->vuses[j];
2630           HOST_WIDE_INT offset = next->addr_offset - use->addr_offset;
2631
2632           /* Split group if aksed to, or the offset against the first
2633              use can't fit in offset part of addressing mode.  IV uses
2634              having the same offset are still kept in one group.  */
2635           if (offset != 0 &&
2636               (split_p || !addr_offset_valid_p (use, offset)))
2637             {
2638               if (!new_group)
2639                 new_group = record_group (data, group->type);
2640               group->vuses.ordered_remove (j);
2641               new_group->vuses.safe_push (next);
2642               continue;
2643             }
2644
2645           next->id = j;
2646           next->group_id = group->id;
2647           j++;
2648         }
2649     }
2650 }
2651
2652 /* Finds uses of the induction variables that are interesting.  */
2653
2654 static void
2655 find_interesting_uses (struct ivopts_data *data)
2656 {
2657   basic_block bb;
2658   gimple_stmt_iterator bsi;
2659   basic_block *body = get_loop_body (data->current_loop);
2660   unsigned i;
2661   edge e;
2662
2663   for (i = 0; i < data->current_loop->num_nodes; i++)
2664     {
2665       edge_iterator ei;
2666       bb = body[i];
2667
2668       FOR_EACH_EDGE (e, ei, bb->succs)
2669         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2670             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2671           find_interesting_uses_outside (data, e);
2672
2673       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2674         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2675       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2676         if (!is_gimple_debug (gsi_stmt (bsi)))
2677           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2678     }
2679   free (body);
2680
2681   split_address_groups (data);
2682
2683   if (dump_file && (dump_flags & TDF_DETAILS))
2684     {
2685       fprintf (dump_file, "\n<IV Groups>:\n");
2686       dump_groups (dump_file, data);
2687       fprintf (dump_file, "\n");
2688     }
2689 }
2690
2691 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2692    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2693    we are at the top-level of the processed address.  */
2694
2695 static tree
2696 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2697                 HOST_WIDE_INT *offset)
2698 {
2699   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2700   enum tree_code code;
2701   tree type, orig_type = TREE_TYPE (expr);
2702   HOST_WIDE_INT off0, off1, st;
2703   tree orig_expr = expr;
2704
2705   STRIP_NOPS (expr);
2706
2707   type = TREE_TYPE (expr);
2708   code = TREE_CODE (expr);
2709   *offset = 0;
2710
2711   switch (code)
2712     {
2713     case INTEGER_CST:
2714       if (!cst_and_fits_in_hwi (expr)
2715           || integer_zerop (expr))
2716         return orig_expr;
2717
2718       *offset = int_cst_value (expr);
2719       return build_int_cst (orig_type, 0);
2720
2721     case POINTER_PLUS_EXPR:
2722     case PLUS_EXPR:
2723     case MINUS_EXPR:
2724       op0 = TREE_OPERAND (expr, 0);
2725       op1 = TREE_OPERAND (expr, 1);
2726
2727       op0 = strip_offset_1 (op0, false, false, &off0);
2728       op1 = strip_offset_1 (op1, false, false, &off1);
2729
2730       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2731       if (op0 == TREE_OPERAND (expr, 0)
2732           && op1 == TREE_OPERAND (expr, 1))
2733         return orig_expr;
2734
2735       if (integer_zerop (op1))
2736         expr = op0;
2737       else if (integer_zerop (op0))
2738         {
2739           if (code == MINUS_EXPR)
2740             expr = fold_build1 (NEGATE_EXPR, type, op1);
2741           else
2742             expr = op1;
2743         }
2744       else
2745         expr = fold_build2 (code, type, op0, op1);
2746
2747       return fold_convert (orig_type, expr);
2748
2749     case MULT_EXPR:
2750       op1 = TREE_OPERAND (expr, 1);
2751       if (!cst_and_fits_in_hwi (op1))
2752         return orig_expr;
2753
2754       op0 = TREE_OPERAND (expr, 0);
2755       op0 = strip_offset_1 (op0, false, false, &off0);
2756       if (op0 == TREE_OPERAND (expr, 0))
2757         return orig_expr;
2758
2759       *offset = off0 * int_cst_value (op1);
2760       if (integer_zerop (op0))
2761         expr = op0;
2762       else
2763         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2764
2765       return fold_convert (orig_type, expr);
2766
2767     case ARRAY_REF:
2768     case ARRAY_RANGE_REF:
2769       if (!inside_addr)
2770         return orig_expr;
2771
2772       step = array_ref_element_size (expr);
2773       if (!cst_and_fits_in_hwi (step))
2774         break;
2775
2776       st = int_cst_value (step);
2777       op1 = TREE_OPERAND (expr, 1);
2778       op1 = strip_offset_1 (op1, false, false, &off1);
2779       *offset = off1 * st;
2780
2781       if (top_compref
2782           && integer_zerop (op1))
2783         {
2784           /* Strip the component reference completely.  */
2785           op0 = TREE_OPERAND (expr, 0);
2786           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2787           *offset += off0;
2788           return op0;
2789         }
2790       break;
2791
2792     case COMPONENT_REF:
2793       {
2794         tree field;
2795
2796         if (!inside_addr)
2797           return orig_expr;
2798
2799         tmp = component_ref_field_offset (expr);
2800         field = TREE_OPERAND (expr, 1);
2801         if (top_compref
2802             && cst_and_fits_in_hwi (tmp)
2803             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2804           {
2805             HOST_WIDE_INT boffset, abs_off;
2806
2807             /* Strip the component reference completely.  */
2808             op0 = TREE_OPERAND (expr, 0);
2809             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2810             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2811             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2812             if (boffset < 0)
2813               abs_off = -abs_off;
2814
2815             *offset = off0 + int_cst_value (tmp) + abs_off;
2816             return op0;
2817           }
2818       }
2819       break;
2820
2821     case ADDR_EXPR:
2822       op0 = TREE_OPERAND (expr, 0);
2823       op0 = strip_offset_1 (op0, true, true, &off0);
2824       *offset += off0;
2825
2826       if (op0 == TREE_OPERAND (expr, 0))
2827         return orig_expr;
2828
2829       expr = build_fold_addr_expr (op0);
2830       return fold_convert (orig_type, expr);
2831
2832     case MEM_REF:
2833       /* ???  Offset operand?  */
2834       inside_addr = false;
2835       break;
2836
2837     default:
2838       return orig_expr;
2839     }
2840
2841   /* Default handling of expressions for that we want to recurse into
2842      the first operand.  */
2843   op0 = TREE_OPERAND (expr, 0);
2844   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2845   *offset += off0;
2846
2847   if (op0 == TREE_OPERAND (expr, 0)
2848       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2849     return orig_expr;
2850
2851   expr = copy_node (expr);
2852   TREE_OPERAND (expr, 0) = op0;
2853   if (op1)
2854     TREE_OPERAND (expr, 1) = op1;
2855
2856   /* Inside address, we might strip the top level component references,
2857      thus changing type of the expression.  Handling of ADDR_EXPR
2858      will fix that.  */
2859   expr = fold_convert (orig_type, expr);
2860
2861   return expr;
2862 }
2863
2864 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2865
2866 static tree
2867 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2868 {
2869   HOST_WIDE_INT off;
2870   tree core = strip_offset_1 (expr, false, false, &off);
2871   *offset = off;
2872   return core;
2873 }
2874
2875 /* Returns variant of TYPE that can be used as base for different uses.
2876    We return unsigned type with the same precision, which avoids problems
2877    with overflows.  */
2878
2879 static tree
2880 generic_type_for (tree type)
2881 {
2882   if (POINTER_TYPE_P (type))
2883     return unsigned_type_for (type);
2884
2885   if (TYPE_UNSIGNED (type))
2886     return type;
2887
2888   return unsigned_type_for (type);
2889 }
2890
2891 /* Private data for walk_tree.  */
2892
2893 struct walk_tree_data
2894 {
2895   bitmap *inv_vars;
2896   struct ivopts_data *idata;
2897 };
2898
2899 /* Callback function for walk_tree, it records invariants and symbol
2900    reference in *EXPR_P.  DATA is the structure storing result info.  */
2901
2902 static tree
2903 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2904 {
2905   tree op = *expr_p;
2906   struct version_info *info;
2907   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2908
2909   if (TREE_CODE (op) != SSA_NAME)
2910     return NULL_TREE;
2911
2912   info = name_info (wdata->idata, op);
2913   /* Because we expand simple operations when finding IVs, loop invariant
2914      variable that isn't referred by the original loop could be used now.
2915      Record such invariant variables here.  */
2916   if (!info->iv)
2917     {
2918       struct ivopts_data *idata = wdata->idata;
2919       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2920
2921       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2922         {
2923           set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
2924           record_invariant (idata, op, false);
2925         }
2926     }
2927   if (!info->inv_id || info->has_nonlin_use)
2928     return NULL_TREE;
2929
2930   if (!*wdata->inv_vars)
2931     *wdata->inv_vars = BITMAP_ALLOC (NULL);
2932   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2933
2934   return NULL_TREE;
2935 }
2936
2937 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
2938    store it.  */
2939
2940 static inline void
2941 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2942 {
2943   struct walk_tree_data wdata;
2944
2945   if (!inv_vars)
2946     return;
2947
2948   wdata.idata = data;
2949   wdata.inv_vars = inv_vars;
2950   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
2951 }
2952
2953 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
2954    will be recorded if it doesn't exist yet.  Given below two exprs:
2955      inv_expr + cst1, inv_expr + cst2
2956    It's hard to make decision whether constant part should be stripped
2957    or not.  We choose to not strip based on below facts:
2958      1) We need to count ADD cost for constant part if it's stripped,
2959         which is't always trivial where this functions is called.
2960      2) Stripping constant away may be conflict with following loop
2961         invariant hoisting pass.
2962      3) Not stripping constant away results in more invariant exprs,
2963         which usually leads to decision preferring lower reg pressure.  */
2964
2965 static iv_inv_expr_ent *
2966 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
2967 {
2968   STRIP_NOPS (inv_expr);
2969
2970   if (TREE_CODE (inv_expr) == INTEGER_CST || TREE_CODE (inv_expr) == SSA_NAME)
2971     return NULL;
2972
2973   /* Don't strip constant part away as we used to.  */
2974
2975   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
2976   struct iv_inv_expr_ent ent;
2977   ent.expr = inv_expr;
2978   ent.hash = iterative_hash_expr (inv_expr, 0);
2979   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
2980
2981   if (!*slot)
2982     {
2983       *slot = XNEW (struct iv_inv_expr_ent);
2984       (*slot)->expr = inv_expr;
2985       (*slot)->hash = ent.hash;
2986       (*slot)->id = ++data->max_inv_expr_id;
2987     }
2988
2989   return *slot;
2990 }
2991
2992 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2993    position to POS.  If USE is not NULL, the candidate is set as related to
2994    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2995    replacement of the final value of the iv by a direct computation.  */
2996
2997 static struct iv_cand *
2998 add_candidate_1 (struct ivopts_data *data,
2999                  tree base, tree step, bool important, enum iv_position pos,
3000                  struct iv_use *use, gimple *incremented_at,
3001                  struct iv *orig_iv = NULL)
3002 {
3003   unsigned i;
3004   struct iv_cand *cand = NULL;
3005   tree type, orig_type;
3006
3007   gcc_assert (base && step);
3008
3009   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3010      live, but the ivopts code may replace a real pointer with one
3011      pointing before or after the memory block that is then adjusted
3012      into the memory block during the loop.  FIXME: It would likely be
3013      better to actually force the pointer live and still use ivopts;
3014      for example, it would be enough to write the pointer into memory
3015      and keep it there until after the loop.  */
3016   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3017     return NULL;
3018
3019   /* For non-original variables, make sure their values are computed in a type
3020      that does not invoke undefined behavior on overflows (since in general,
3021      we cannot prove that these induction variables are non-wrapping).  */
3022   if (pos != IP_ORIGINAL)
3023     {
3024       orig_type = TREE_TYPE (base);
3025       type = generic_type_for (orig_type);
3026       if (type != orig_type)
3027         {
3028           base = fold_convert (type, base);
3029           step = fold_convert (type, step);
3030         }
3031     }
3032
3033   for (i = 0; i < data->vcands.length (); i++)
3034     {
3035       cand = data->vcands[i];
3036
3037       if (cand->pos != pos)
3038         continue;
3039
3040       if (cand->incremented_at != incremented_at
3041           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3042               && cand->ainc_use != use))
3043         continue;
3044
3045       if (operand_equal_p (base, cand->iv->base, 0)
3046           && operand_equal_p (step, cand->iv->step, 0)
3047           && (TYPE_PRECISION (TREE_TYPE (base))
3048               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3049         break;
3050     }
3051
3052   if (i == data->vcands.length ())
3053     {
3054       cand = XCNEW (struct iv_cand);
3055       cand->id = i;
3056       cand->iv = alloc_iv (data, base, step);
3057       cand->pos = pos;
3058       if (pos != IP_ORIGINAL)
3059         {
3060           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3061           cand->var_after = cand->var_before;
3062         }
3063       cand->important = important;
3064       cand->incremented_at = incremented_at;
3065       data->vcands.safe_push (cand);
3066
3067       if (TREE_CODE (step) != INTEGER_CST)
3068         {
3069           find_inv_vars (data, &step, &cand->inv_vars);
3070
3071           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3072           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3073           if (inv_expr != NULL)
3074             {
3075               cand->inv_exprs = cand->inv_vars;
3076               cand->inv_vars = NULL;
3077               if (cand->inv_exprs)
3078                 bitmap_clear (cand->inv_exprs);
3079               else
3080                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3081
3082               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3083             }
3084         }
3085
3086       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3087         cand->ainc_use = use;
3088       else
3089         cand->ainc_use = NULL;
3090
3091       cand->orig_iv = orig_iv;
3092       if (dump_file && (dump_flags & TDF_DETAILS))
3093         dump_cand (dump_file, cand);
3094     }
3095
3096   cand->important |= important;
3097
3098   /* Relate candidate to the group for which it is added.  */
3099   if (use)
3100     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3101
3102   return cand;
3103 }
3104
3105 /* Returns true if incrementing the induction variable at the end of the LOOP
3106    is allowed.
3107
3108    The purpose is to avoid splitting latch edge with a biv increment, thus
3109    creating a jump, possibly confusing other optimization passes and leaving
3110    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3111    available (so we do not have a better alternative), or if the latch edge
3112    is already nonempty.  */
3113
3114 static bool
3115 allow_ip_end_pos_p (struct loop *loop)
3116 {
3117   if (!ip_normal_pos (loop))
3118     return true;
3119
3120   if (!empty_block_p (ip_end_pos (loop)))
3121     return true;
3122
3123   return false;
3124 }
3125
3126 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3127    Important field is set to IMPORTANT.  */
3128
3129 static void
3130 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3131                         bool important, struct iv_use *use)
3132 {
3133   basic_block use_bb = gimple_bb (use->stmt);
3134   machine_mode mem_mode;
3135   unsigned HOST_WIDE_INT cstepi;
3136
3137   /* If we insert the increment in any position other than the standard
3138      ones, we must ensure that it is incremented once per iteration.
3139      It must not be in an inner nested loop, or one side of an if
3140      statement.  */
3141   if (use_bb->loop_father != data->current_loop
3142       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3143       || stmt_could_throw_p (use->stmt)
3144       || !cst_and_fits_in_hwi (step))
3145     return;
3146
3147   cstepi = int_cst_value (step);
3148
3149   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3150   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3151         || USE_STORE_PRE_INCREMENT (mem_mode))
3152        && GET_MODE_SIZE (mem_mode) == cstepi)
3153       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3154            || USE_STORE_PRE_DECREMENT (mem_mode))
3155           && GET_MODE_SIZE (mem_mode) == -cstepi))
3156     {
3157       enum tree_code code = MINUS_EXPR;
3158       tree new_base;
3159       tree new_step = step;
3160
3161       if (POINTER_TYPE_P (TREE_TYPE (base)))
3162         {
3163           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3164           code = POINTER_PLUS_EXPR;
3165         }
3166       else
3167         new_step = fold_convert (TREE_TYPE (base), new_step);
3168       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3169       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3170                        use->stmt);
3171     }
3172   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3173         || USE_STORE_POST_INCREMENT (mem_mode))
3174        && GET_MODE_SIZE (mem_mode) == cstepi)
3175       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3176            || USE_STORE_POST_DECREMENT (mem_mode))
3177           && GET_MODE_SIZE (mem_mode) == -cstepi))
3178     {
3179       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3180                        use->stmt);
3181     }
3182 }
3183
3184 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3185    position to POS.  If USE is not NULL, the candidate is set as related to
3186    it.  The candidate computation is scheduled before exit condition and at
3187    the end of loop.  */
3188
3189 static void
3190 add_candidate (struct ivopts_data *data,
3191                tree base, tree step, bool important, struct iv_use *use,
3192                struct iv *orig_iv = NULL)
3193 {
3194   if (ip_normal_pos (data->current_loop))
3195     add_candidate_1 (data, base, step, important,
3196                      IP_NORMAL, use, NULL, orig_iv);
3197   if (ip_end_pos (data->current_loop)
3198       && allow_ip_end_pos_p (data->current_loop))
3199     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3200 }
3201
3202 /* Adds standard iv candidates.  */
3203
3204 static void
3205 add_standard_iv_candidates (struct ivopts_data *data)
3206 {
3207   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3208
3209   /* The same for a double-integer type if it is still fast enough.  */
3210   if (TYPE_PRECISION
3211         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3212       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3213     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3214                    build_int_cst (long_integer_type_node, 1), true, NULL);
3215
3216   /* The same for a double-integer type if it is still fast enough.  */
3217   if (TYPE_PRECISION
3218         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3219       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3220     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3221                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3222 }
3223
3224
3225 /* Adds candidates bases on the old induction variable IV.  */
3226
3227 static void
3228 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3229 {
3230   gimple *phi;
3231   tree def;
3232   struct iv_cand *cand;
3233
3234   /* Check if this biv is used in address type use.  */
3235   if (iv->no_overflow  && iv->have_address_use
3236       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3237       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3238     {
3239       tree base = fold_convert (sizetype, iv->base);
3240       tree step = fold_convert (sizetype, iv->step);
3241
3242       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3243       add_candidate (data, base, step, true, NULL, iv);
3244       /* Add iv cand of the original type only if it has nonlinear use.  */
3245       if (iv->nonlin_use)
3246         add_candidate (data, iv->base, iv->step, true, NULL);
3247     }
3248   else
3249     add_candidate (data, iv->base, iv->step, true, NULL);
3250
3251   /* The same, but with initial value zero.  */
3252   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3253     add_candidate (data, size_int (0), iv->step, true, NULL);
3254   else
3255     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3256                    iv->step, true, NULL);
3257
3258   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3259   if (gimple_code (phi) == GIMPLE_PHI)
3260     {
3261       /* Additionally record the possibility of leaving the original iv
3262          untouched.  */
3263       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3264       /* Don't add candidate if it's from another PHI node because
3265          it's an affine iv appearing in the form of PEELED_CHREC.  */
3266       phi = SSA_NAME_DEF_STMT (def);
3267       if (gimple_code (phi) != GIMPLE_PHI)
3268         {
3269           cand = add_candidate_1 (data,
3270                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3271                                   SSA_NAME_DEF_STMT (def));
3272           if (cand)
3273             {
3274               cand->var_before = iv->ssa_name;
3275               cand->var_after = def;
3276             }
3277         }
3278       else
3279         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3280     }
3281 }
3282
3283 /* Adds candidates based on the old induction variables.  */
3284
3285 static void
3286 add_iv_candidate_for_bivs (struct ivopts_data *data)
3287 {
3288   unsigned i;
3289   struct iv *iv;
3290   bitmap_iterator bi;
3291
3292   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3293     {
3294       iv = ver_info (data, i)->iv;
3295       if (iv && iv->biv_p && !integer_zerop (iv->step))
3296         add_iv_candidate_for_biv (data, iv);
3297     }
3298 }
3299
3300 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3301
3302 static void
3303 record_common_cand (struct ivopts_data *data, tree base,
3304                     tree step, struct iv_use *use)
3305 {
3306   struct iv_common_cand ent;
3307   struct iv_common_cand **slot;
3308
3309   ent.base = base;
3310   ent.step = step;
3311   ent.hash = iterative_hash_expr (base, 0);
3312   ent.hash = iterative_hash_expr (step, ent.hash);
3313
3314   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3315   if (*slot == NULL)
3316     {
3317       *slot = new iv_common_cand ();
3318       (*slot)->base = base;
3319       (*slot)->step = step;
3320       (*slot)->uses.create (8);
3321       (*slot)->hash = ent.hash;
3322       data->iv_common_cands.safe_push ((*slot));
3323     }
3324
3325   gcc_assert (use != NULL);
3326   (*slot)->uses.safe_push (use);
3327   return;
3328 }
3329
3330 /* Comparison function used to sort common candidates.  */
3331
3332 static int
3333 common_cand_cmp (const void *p1, const void *p2)
3334 {
3335   unsigned n1, n2;
3336   const struct iv_common_cand *const *const ccand1
3337     = (const struct iv_common_cand *const *)p1;
3338   const struct iv_common_cand *const *const ccand2
3339     = (const struct iv_common_cand *const *)p2;
3340
3341   n1 = (*ccand1)->uses.length ();
3342   n2 = (*ccand2)->uses.length ();
3343   return n2 - n1;
3344 }
3345
3346 /* Adds IV candidates based on common candidated recorded.  */
3347
3348 static void
3349 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3350 {
3351   unsigned i, j;
3352   struct iv_cand *cand_1, *cand_2;
3353
3354   data->iv_common_cands.qsort (common_cand_cmp);
3355   for (i = 0; i < data->iv_common_cands.length (); i++)
3356     {
3357       struct iv_common_cand *ptr = data->iv_common_cands[i];
3358
3359       /* Only add IV candidate if it's derived from multiple uses.  */
3360       if (ptr->uses.length () <= 1)
3361         break;
3362
3363       cand_1 = NULL;
3364       cand_2 = NULL;
3365       if (ip_normal_pos (data->current_loop))
3366         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3367                                   false, IP_NORMAL, NULL, NULL);
3368
3369       if (ip_end_pos (data->current_loop)
3370           && allow_ip_end_pos_p (data->current_loop))
3371         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3372                                   false, IP_END, NULL, NULL);
3373
3374       /* Bind deriving uses and the new candidates.  */
3375       for (j = 0; j < ptr->uses.length (); j++)
3376         {
3377           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3378           if (cand_1)
3379             bitmap_set_bit (group->related_cands, cand_1->id);
3380           if (cand_2)
3381             bitmap_set_bit (group->related_cands, cand_2->id);
3382         }
3383     }
3384
3385   /* Release data since it is useless from this point.  */
3386   data->iv_common_cand_tab->empty ();
3387   data->iv_common_cands.truncate (0);
3388 }
3389
3390 /* Adds candidates based on the value of USE's iv.  */
3391
3392 static void
3393 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3394 {
3395   unsigned HOST_WIDE_INT offset;
3396   tree base;
3397   tree basetype;
3398   struct iv *iv = use->iv;
3399
3400   add_candidate (data, iv->base, iv->step, false, use);
3401
3402   /* Record common candidate for use in case it can be shared by others.  */
3403   record_common_cand (data, iv->base, iv->step, use);
3404
3405   /* Record common candidate with initial value zero.  */
3406   basetype = TREE_TYPE (iv->base);
3407   if (POINTER_TYPE_P (basetype))
3408     basetype = sizetype;
3409   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3410
3411   /* Record common candidate with constant offset stripped in base.
3412      Like the use itself, we also add candidate directly for it.  */
3413   base = strip_offset (iv->base, &offset);
3414   if (offset || base != iv->base)
3415     {
3416       record_common_cand (data, base, iv->step, use);
3417       add_candidate (data, base, iv->step, false, use);
3418     }
3419
3420   /* Record common candidate with base_object removed in base.  */
3421   base = iv->base;
3422   STRIP_NOPS (base);
3423   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3424     {
3425       tree step = iv->step;
3426
3427       STRIP_NOPS (step);
3428       base = TREE_OPERAND (base, 1);
3429       step = fold_convert (sizetype, step);
3430       record_common_cand (data, base, step, use);
3431       /* Also record common candidate with offset stripped.  */
3432       base = strip_offset (base, &offset);
3433       if (offset)
3434         record_common_cand (data, base, step, use);
3435     }
3436
3437   /* At last, add auto-incremental candidates.  Make such variables
3438      important since other iv uses with same base object may be based
3439      on it.  */
3440   if (use != NULL && use->type == USE_ADDRESS)
3441     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3442 }
3443
3444 /* Adds candidates based on the uses.  */
3445
3446 static void
3447 add_iv_candidate_for_groups (struct ivopts_data *data)
3448 {
3449   unsigned i;
3450
3451   /* Only add candidate for the first use in group.  */
3452   for (i = 0; i < data->vgroups.length (); i++)
3453     {
3454       struct iv_group *group = data->vgroups[i];
3455
3456       gcc_assert (group->vuses[0] != NULL);
3457       add_iv_candidate_for_use (data, group->vuses[0]);
3458     }
3459   add_iv_candidate_derived_from_uses (data);
3460 }
3461
3462 /* Record important candidates and add them to related_cands bitmaps.  */
3463
3464 static void
3465 record_important_candidates (struct ivopts_data *data)
3466 {
3467   unsigned i;
3468   struct iv_group *group;
3469
3470   for (i = 0; i < data->vcands.length (); i++)
3471     {
3472       struct iv_cand *cand = data->vcands[i];
3473
3474       if (cand->important)
3475         bitmap_set_bit (data->important_candidates, i);
3476     }
3477
3478   data->consider_all_candidates = (data->vcands.length ()
3479                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3480
3481   /* Add important candidates to groups' related_cands bitmaps.  */
3482   for (i = 0; i < data->vgroups.length (); i++)
3483     {
3484       group = data->vgroups[i];
3485       bitmap_ior_into (group->related_cands, data->important_candidates);
3486     }
3487 }
3488
3489 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3490    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3491    we allocate a simple list to every use.  */
3492
3493 static void
3494 alloc_use_cost_map (struct ivopts_data *data)
3495 {
3496   unsigned i, size, s;
3497
3498   for (i = 0; i < data->vgroups.length (); i++)
3499     {
3500       struct iv_group *group = data->vgroups[i];
3501
3502       if (data->consider_all_candidates)
3503         size = data->vcands.length ();
3504       else
3505         {
3506           s = bitmap_count_bits (group->related_cands);
3507
3508           /* Round up to the power of two, so that moduling by it is fast.  */
3509           size = s ? (1 << ceil_log2 (s)) : 1;
3510         }
3511
3512       group->n_map_members = size;
3513       group->cost_map = XCNEWVEC (struct cost_pair, size);
3514     }
3515 }
3516
3517 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3518    on invariants INV_VARS and that the value used in expressing it is
3519    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3520
3521 static void
3522 set_group_iv_cost (struct ivopts_data *data,
3523                    struct iv_group *group, struct iv_cand *cand,
3524                    comp_cost cost, bitmap inv_vars, tree value,
3525                    enum tree_code comp, bitmap inv_exprs)
3526 {
3527   unsigned i, s;
3528
3529   if (cost.infinite_cost_p ())
3530     {
3531       BITMAP_FREE (inv_vars);
3532       BITMAP_FREE (inv_exprs);
3533       return;
3534     }
3535
3536   if (data->consider_all_candidates)
3537     {
3538       group->cost_map[cand->id].cand = cand;
3539       group->cost_map[cand->id].cost = cost;
3540       group->cost_map[cand->id].inv_vars = inv_vars;
3541       group->cost_map[cand->id].inv_exprs = inv_exprs;
3542       group->cost_map[cand->id].value = value;
3543       group->cost_map[cand->id].comp = comp;
3544       return;
3545     }
3546
3547   /* n_map_members is a power of two, so this computes modulo.  */
3548   s = cand->id & (group->n_map_members - 1);
3549   for (i = s; i < group->n_map_members; i++)
3550     if (!group->cost_map[i].cand)
3551       goto found;
3552   for (i = 0; i < s; i++)
3553     if (!group->cost_map[i].cand)
3554       goto found;
3555
3556   gcc_unreachable ();
3557
3558 found:
3559   group->cost_map[i].cand = cand;
3560   group->cost_map[i].cost = cost;
3561   group->cost_map[i].inv_vars = inv_vars;
3562   group->cost_map[i].inv_exprs = inv_exprs;
3563   group->cost_map[i].value = value;
3564   group->cost_map[i].comp = comp;
3565 }
3566
3567 /* Gets cost of (GROUP, CAND) pair.  */
3568
3569 static struct cost_pair *
3570 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3571                    struct iv_cand *cand)
3572 {
3573   unsigned i, s;
3574   struct cost_pair *ret;
3575
3576   if (!cand)
3577     return NULL;
3578
3579   if (data->consider_all_candidates)
3580     {
3581       ret = group->cost_map + cand->id;
3582       if (!ret->cand)
3583         return NULL;
3584
3585       return ret;
3586     }
3587
3588   /* n_map_members is a power of two, so this computes modulo.  */
3589   s = cand->id & (group->n_map_members - 1);
3590   for (i = s; i < group->n_map_members; i++)
3591     if (group->cost_map[i].cand == cand)
3592       return group->cost_map + i;
3593     else if (group->cost_map[i].cand == NULL)
3594       return NULL;
3595   for (i = 0; i < s; i++)
3596     if (group->cost_map[i].cand == cand)
3597       return group->cost_map + i;
3598     else if (group->cost_map[i].cand == NULL)
3599       return NULL;
3600
3601   return NULL;
3602 }
3603
3604 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3605 static rtx
3606 produce_memory_decl_rtl (tree obj, int *regno)
3607 {
3608   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3609   machine_mode address_mode = targetm.addr_space.address_mode (as);
3610   rtx x;
3611
3612   gcc_assert (obj);
3613   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3614     {
3615       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3616       x = gen_rtx_SYMBOL_REF (address_mode, name);
3617       SET_SYMBOL_REF_DECL (x, obj);
3618       x = gen_rtx_MEM (DECL_MODE (obj), x);
3619       set_mem_addr_space (x, as);
3620       targetm.encode_section_info (obj, x, true);
3621     }
3622   else
3623     {
3624       x = gen_raw_REG (address_mode, (*regno)++);
3625       x = gen_rtx_MEM (DECL_MODE (obj), x);
3626       set_mem_addr_space (x, as);
3627     }
3628
3629   return x;
3630 }
3631
3632 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3633    walk_tree.  DATA contains the actual fake register number.  */
3634
3635 static tree
3636 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3637 {
3638   tree obj = NULL_TREE;
3639   rtx x = NULL_RTX;
3640   int *regno = (int *) data;
3641
3642   switch (TREE_CODE (*expr_p))
3643     {
3644     case ADDR_EXPR:
3645       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3646            handled_component_p (*expr_p);
3647            expr_p = &TREE_OPERAND (*expr_p, 0))
3648         continue;
3649       obj = *expr_p;
3650       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3651         x = produce_memory_decl_rtl (obj, regno);
3652       break;
3653
3654     case SSA_NAME:
3655       *ws = 0;
3656       obj = SSA_NAME_VAR (*expr_p);
3657       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3658       if (!obj)
3659         return NULL_TREE;
3660       if (!DECL_RTL_SET_P (obj))
3661         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3662       break;
3663
3664     case VAR_DECL:
3665     case PARM_DECL:
3666     case RESULT_DECL:
3667       *ws = 0;
3668       obj = *expr_p;
3669
3670       if (DECL_RTL_SET_P (obj))
3671         break;
3672
3673       if (DECL_MODE (obj) == BLKmode)
3674         x = produce_memory_decl_rtl (obj, regno);
3675       else
3676         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3677
3678       break;
3679
3680     default:
3681       break;
3682     }
3683
3684   if (x)
3685     {
3686       decl_rtl_to_reset.safe_push (obj);
3687       SET_DECL_RTL (obj, x);
3688     }
3689
3690   return NULL_TREE;
3691 }
3692
3693 /* Determines cost of the computation of EXPR.  */
3694
3695 static unsigned
3696 computation_cost (tree expr, bool speed)
3697 {
3698   rtx_insn *seq;
3699   rtx rslt;
3700   tree type = TREE_TYPE (expr);
3701   unsigned cost;
3702   /* Avoid using hard regs in ways which may be unsupported.  */
3703   int regno = LAST_VIRTUAL_REGISTER + 1;
3704   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3705   enum node_frequency real_frequency = node->frequency;
3706
3707   node->frequency = NODE_FREQUENCY_NORMAL;
3708   crtl->maybe_hot_insn_p = speed;
3709   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3710   start_sequence ();
3711   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3712   seq = get_insns ();
3713   end_sequence ();
3714   default_rtl_profile ();
3715   node->frequency = real_frequency;
3716
3717   cost = seq_cost (seq, speed);
3718   if (MEM_P (rslt))
3719     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3720                           TYPE_ADDR_SPACE (type), speed);
3721   else if (!REG_P (rslt))
3722     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3723
3724   return cost;
3725 }
3726
3727 /* Returns variable containing the value of candidate CAND at statement AT.  */
3728
3729 static tree
3730 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3731 {
3732   if (stmt_after_increment (loop, cand, stmt))
3733     return cand->var_after;
3734   else
3735     return cand->var_before;
3736 }
3737
3738 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3739    same precision that is at least as wide as the precision of TYPE, stores
3740    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3741    type of A and B.  */
3742
3743 static tree
3744 determine_common_wider_type (tree *a, tree *b)
3745 {
3746   tree wider_type = NULL;
3747   tree suba, subb;
3748   tree atype = TREE_TYPE (*a);
3749
3750   if (CONVERT_EXPR_P (*a))
3751     {
3752       suba = TREE_OPERAND (*a, 0);
3753       wider_type = TREE_TYPE (suba);
3754       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3755         return atype;
3756     }
3757   else
3758     return atype;
3759
3760   if (CONVERT_EXPR_P (*b))
3761     {
3762       subb = TREE_OPERAND (*b, 0);
3763       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3764         return atype;
3765     }
3766   else
3767     return atype;
3768
3769   *a = suba;
3770   *b = subb;
3771   return wider_type;
3772 }
3773
3774 /* Determines the expression by that USE is expressed from induction variable
3775    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3776    decomposed form.  The invariant part is stored in AFF_INV; while variant
3777    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3778    non-null.  Returns false if USE cannot be expressed using CAND.  */
3779
3780 static bool
3781 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3782                        struct iv_cand *cand, struct aff_tree *aff_inv,
3783                        struct aff_tree *aff_var, widest_int *prat = NULL)
3784 {
3785   tree ubase = use->iv->base, ustep = use->iv->step;
3786   tree cbase = cand->iv->base, cstep = cand->iv->step;
3787   tree common_type, uutype, var, cstep_common;
3788   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3789   aff_tree aff_cbase;
3790   widest_int rat;
3791
3792   /* We must have a precision to express the values of use.  */
3793   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3794     return false;
3795
3796   var = var_at_stmt (loop, cand, at);
3797   uutype = unsigned_type_for (utype);
3798
3799   /* If the conversion is not noop, perform it.  */
3800   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3801     {
3802       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3803           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3804         {
3805           tree inner_base, inner_step, inner_type;
3806           inner_base = TREE_OPERAND (cbase, 0);
3807           if (CONVERT_EXPR_P (cstep))
3808             inner_step = TREE_OPERAND (cstep, 0);
3809           else
3810             inner_step = cstep;
3811
3812           inner_type = TREE_TYPE (inner_base);
3813           /* If candidate is added from a biv whose type is smaller than
3814              ctype, we know both candidate and the biv won't overflow.
3815              In this case, it's safe to skip the convertion in candidate.
3816              As an example, (unsigned short)((unsigned long)A) equals to
3817              (unsigned short)A, if A has a type no larger than short.  */
3818           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3819             {
3820               cbase = inner_base;
3821               cstep = inner_step;
3822             }
3823         }
3824       cbase = fold_convert (uutype, cbase);
3825       cstep = fold_convert (uutype, cstep);
3826       var = fold_convert (uutype, var);
3827     }
3828
3829   /* Ratio is 1 when computing the value of biv cand by itself.
3830      We can't rely on constant_multiple_of in this case because the
3831      use is created after the original biv is selected.  The call
3832      could fail because of inconsistent fold behavior.  See PR68021
3833      for more information.  */
3834   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3835     {
3836       gcc_assert (is_gimple_assign (use->stmt));
3837       gcc_assert (use->iv->ssa_name == cand->var_after);
3838       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3839       rat = 1;
3840     }
3841   else if (!constant_multiple_of (ustep, cstep, &rat))
3842     return false;
3843
3844   if (prat)
3845     *prat = rat;
3846
3847   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3848      type, we achieve better folding by computing their difference in this
3849      wider type, and cast the result to UUTYPE.  We do not need to worry about
3850      overflows, as all the arithmetics will in the end be performed in UUTYPE
3851      anyway.  */
3852   common_type = determine_common_wider_type (&ubase, &cbase);
3853
3854   /* use = ubase - ratio * cbase + ratio * var.  */
3855   tree_to_aff_combination (ubase, common_type, aff_inv);
3856   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3857   tree_to_aff_combination (var, uutype, aff_var);
3858
3859   /* We need to shift the value if we are after the increment.  */
3860   if (stmt_after_increment (loop, cand, at))
3861     {
3862       aff_tree cstep_aff;
3863
3864       if (common_type != uutype)
3865         cstep_common = fold_convert (common_type, cstep);
3866       else
3867         cstep_common = cstep;
3868
3869       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3870       aff_combination_add (&aff_cbase, &cstep_aff);
3871     }
3872
3873   aff_combination_scale (&aff_cbase, -rat);
3874   aff_combination_add (aff_inv, &aff_cbase);
3875   if (common_type != uutype)
3876     aff_combination_convert (aff_inv, uutype);
3877
3878   aff_combination_scale (aff_var, rat);
3879   return true;
3880 }
3881
3882 /* Determines the expression by that USE is expressed from induction variable
3883    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3884    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3885
3886 static bool
3887 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3888                      struct iv_cand *cand, struct aff_tree *aff)
3889 {
3890   aff_tree aff_var;
3891
3892   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3893     return false;
3894
3895   aff_combination_add (aff, &aff_var);
3896   return true;
3897 }
3898
3899 /* Return the type of USE.  */
3900
3901 static tree
3902 get_use_type (struct iv_use *use)
3903 {
3904   tree base_type = TREE_TYPE (use->iv->base);
3905   tree type;
3906
3907   if (use->type == USE_ADDRESS)
3908     {
3909       /* The base_type may be a void pointer.  Create a pointer type based on
3910          the mem_ref instead.  */
3911       type = build_pointer_type (TREE_TYPE (*use->op_p));
3912       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3913                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3914     }
3915   else
3916     type = base_type;
3917
3918   return type;
3919 }
3920
3921 /* Determines the expression by that USE is expressed from induction variable
3922    CAND at statement AT in LOOP.  The computation is unshared.  */
3923
3924 static tree
3925 get_computation_at (struct loop *loop, gimple *at,
3926                     struct iv_use *use, struct iv_cand *cand)
3927 {
3928   aff_tree aff;
3929   tree type = get_use_type (use);
3930
3931   if (!get_computation_aff (loop, at, use, cand, &aff))
3932     return NULL_TREE;
3933   unshare_aff_combination (&aff);
3934   return fold_convert (type, aff_combination_to_tree (&aff));
3935 }
3936
3937 /* Adjust the cost COST for being in loop setup rather than loop body.
3938    If we're optimizing for space, the loop setup overhead is constant;
3939    if we're optimizing for speed, amortize it over the per-iteration cost.
3940    If ROUND_UP_P is true, the result is round up rather than to zero when
3941    optimizing for speed.  */
3942 static unsigned
3943 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
3944                    bool round_up_p = false)
3945 {
3946   if (cost == INFTY)
3947     return cost;
3948   else if (optimize_loop_for_speed_p (data->current_loop))
3949     {
3950       HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
3951       return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
3952     }
3953   else
3954     return cost;
3955 }
3956
3957 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3958    EXPR operand holding the shift.  COST0 and COST1 are the costs for
3959    calculating the operands of EXPR.  Returns true if successful, and returns
3960    the cost in COST.  */
3961
3962 static bool
3963 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3964                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3965 {
3966   comp_cost res;
3967   tree op1 = TREE_OPERAND (expr, 1);
3968   tree cst = TREE_OPERAND (mult, 1);
3969   tree multop = TREE_OPERAND (mult, 0);
3970   int m = exact_log2 (int_cst_value (cst));
3971   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3972   int as_cost, sa_cost;
3973   bool mult_in_op1;
3974
3975   if (!(m >= 0 && m < maxm))
3976     return false;
3977
3978   STRIP_NOPS (op1);
3979   mult_in_op1 = operand_equal_p (op1, mult, 0);
3980
3981   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3982
3983   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3984      use that in preference to a shift insn followed by an add insn.  */
3985   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3986              ? shiftadd_cost (speed, mode, m)
3987              : (mult_in_op1
3988                 ? shiftsub1_cost (speed, mode, m)
3989                 : shiftsub0_cost (speed, mode, m)));
3990
3991   res = comp_cost (MIN (as_cost, sa_cost), 0);
3992   res += (mult_in_op1 ? cost0 : cost1);
3993
3994   STRIP_NOPS (multop);
3995   if (!is_gimple_val (multop))
3996     res += force_expr_to_var_cost (multop, speed);
3997
3998   *cost = res;
3999   return true;
4000 }
4001
4002 /* Estimates cost of forcing expression EXPR into a variable.  */
4003
4004 static comp_cost
4005 force_expr_to_var_cost (tree expr, bool speed)
4006 {
4007   static bool costs_initialized = false;
4008   static unsigned integer_cost [2];
4009   static unsigned symbol_cost [2];
4010   static unsigned address_cost [2];
4011   tree op0, op1;
4012   comp_cost cost0, cost1, cost;
4013   machine_mode mode;
4014
4015   if (!costs_initialized)
4016     {
4017       tree type = build_pointer_type (integer_type_node);
4018       tree var, addr;
4019       rtx x;
4020       int i;
4021
4022       var = create_tmp_var_raw (integer_type_node, "test_var");
4023       TREE_STATIC (var) = 1;
4024       x = produce_memory_decl_rtl (var, NULL);
4025       SET_DECL_RTL (var, x);
4026
4027       addr = build1 (ADDR_EXPR, type, var);
4028
4029
4030       for (i = 0; i < 2; i++)
4031         {
4032           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4033                                                              2000), i);
4034
4035           symbol_cost[i] = computation_cost (addr, i) + 1;
4036
4037           address_cost[i]
4038             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4039           if (dump_file && (dump_flags & TDF_DETAILS))
4040             {
4041               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4042               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4043               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4044               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4045               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4046               fprintf (dump_file, "\n");
4047             }
4048         }
4049
4050       costs_initialized = true;
4051     }
4052
4053   STRIP_NOPS (expr);
4054
4055   if (SSA_VAR_P (expr))
4056     return no_cost;
4057
4058   if (is_gimple_min_invariant (expr))
4059     {
4060       if (TREE_CODE (expr) == INTEGER_CST)
4061         return comp_cost (integer_cost [speed], 0);
4062
4063       if (TREE_CODE (expr) == ADDR_EXPR)
4064         {
4065           tree obj = TREE_OPERAND (expr, 0);
4066
4067           if (VAR_P (obj)
4068               || TREE_CODE (obj) == PARM_DECL
4069               || TREE_CODE (obj) == RESULT_DECL)
4070             return comp_cost (symbol_cost [speed], 0);
4071         }
4072
4073       return comp_cost (address_cost [speed], 0);
4074     }
4075
4076   switch (TREE_CODE (expr))
4077     {
4078     case POINTER_PLUS_EXPR:
4079     case PLUS_EXPR:
4080     case MINUS_EXPR:
4081     case MULT_EXPR:
4082     case TRUNC_DIV_EXPR:
4083     case BIT_AND_EXPR:
4084     case BIT_IOR_EXPR:
4085     case LSHIFT_EXPR:
4086     case RSHIFT_EXPR:
4087       op0 = TREE_OPERAND (expr, 0);
4088       op1 = TREE_OPERAND (expr, 1);
4089       STRIP_NOPS (op0);
4090       STRIP_NOPS (op1);
4091       break;
4092
4093     CASE_CONVERT:
4094     case NEGATE_EXPR:
4095     case BIT_NOT_EXPR:
4096       op0 = TREE_OPERAND (expr, 0);
4097       STRIP_NOPS (op0);
4098       op1 = NULL_TREE;
4099       break;
4100
4101     default:
4102       /* Just an arbitrary value, FIXME.  */
4103       return comp_cost (target_spill_cost[speed], 0);
4104     }
4105
4106   if (op0 == NULL_TREE
4107       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4108     cost0 = no_cost;
4109   else
4110     cost0 = force_expr_to_var_cost (op0, speed);
4111
4112   if (op1 == NULL_TREE
4113       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4114     cost1 = no_cost;
4115   else
4116     cost1 = force_expr_to_var_cost (op1, speed);
4117
4118   mode = TYPE_MODE (TREE_TYPE (expr));
4119   switch (TREE_CODE (expr))
4120     {
4121     case POINTER_PLUS_EXPR:
4122     case PLUS_EXPR:
4123     case MINUS_EXPR:
4124     case NEGATE_EXPR:
4125       cost = comp_cost (add_cost (speed, mode), 0);
4126       if (TREE_CODE (expr) != NEGATE_EXPR)
4127         {
4128           tree mult = NULL_TREE;
4129           comp_cost sa_cost;
4130           if (TREE_CODE (op1) == MULT_EXPR)
4131             mult = op1;
4132           else if (TREE_CODE (op0) == MULT_EXPR)
4133             mult = op0;
4134
4135           if (mult != NULL_TREE
4136               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4137               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4138                                     speed, &sa_cost))
4139             return sa_cost;
4140         }
4141       break;
4142
4143     CASE_CONVERT:
4144       {
4145         tree inner_mode, outer_mode;
4146         outer_mode = TREE_TYPE (expr);
4147         inner_mode = TREE_TYPE (op0);
4148         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4149                                        TYPE_MODE (inner_mode), speed), 0);
4150       }
4151       break;
4152
4153     case MULT_EXPR:
4154       if (cst_and_fits_in_hwi (op0))
4155         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4156                                              mode, speed), 0);
4157       else if (cst_and_fits_in_hwi (op1))
4158         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4159                                              mode, speed), 0);
4160       else
4161         return comp_cost (target_spill_cost [speed], 0);
4162       break;
4163
4164     case TRUNC_DIV_EXPR:
4165       /* Division by power of two is usually cheap, so we allow it.  Forbid
4166          anything else.  */
4167       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4168         cost = comp_cost (add_cost (speed, mode), 0);
4169       else
4170         cost = comp_cost (target_spill_cost[speed], 0);
4171       break;
4172
4173     case BIT_AND_EXPR:
4174     case BIT_IOR_EXPR:
4175     case BIT_NOT_EXPR:
4176     case LSHIFT_EXPR:
4177     case RSHIFT_EXPR:
4178       cost = comp_cost (add_cost (speed, mode), 0);
4179       break;
4180
4181     default:
4182       gcc_unreachable ();
4183     }
4184
4185   cost += cost0;
4186   cost += cost1;
4187   return cost;
4188 }
4189
4190 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4191    invariants the computation depends on.  */
4192
4193 static comp_cost
4194 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4195 {
4196   if (!expr)
4197     return no_cost;
4198
4199   find_inv_vars (data, &expr, inv_vars);
4200   return force_expr_to_var_cost (expr, data->speed);
4201 }
4202
4203 /* Returns cost of auto-modifying address expression in shape base + offset.
4204    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4205    address expression.  The address expression has ADDR_MODE in addr space
4206    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4207    speed or size.  */
4208
4209 enum ainc_type
4210 {
4211   AINC_PRE_INC,         /* Pre increment.  */
4212   AINC_PRE_DEC,         /* Pre decrement.  */
4213   AINC_POST_INC,        /* Post increment.  */
4214   AINC_POST_DEC,        /* Post decrement.  */
4215   AINC_NONE             /* Also the number of auto increment types.  */
4216 };
4217
4218 struct ainc_cost_data
4219 {
4220   unsigned costs[AINC_NONE];
4221 };
4222
4223 static comp_cost
4224 get_address_cost_ainc (HOST_WIDE_INT ainc_step, HOST_WIDE_INT ainc_offset,
4225                        machine_mode addr_mode, machine_mode mem_mode,
4226                        addr_space_t as, bool speed)
4227 {
4228   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4229       && !USE_STORE_PRE_DECREMENT (mem_mode)
4230       && !USE_LOAD_POST_DECREMENT (mem_mode)
4231       && !USE_STORE_POST_DECREMENT (mem_mode)
4232       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4233       && !USE_STORE_PRE_INCREMENT (mem_mode)
4234       && !USE_LOAD_POST_INCREMENT (mem_mode)
4235       && !USE_STORE_POST_INCREMENT (mem_mode))
4236     return infinite_cost;
4237
4238   static vec<ainc_cost_data *> ainc_cost_data_list;
4239   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4240   if (idx >= ainc_cost_data_list.length ())
4241     {
4242       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4243
4244       gcc_assert (nsize > idx);
4245       ainc_cost_data_list.safe_grow_cleared (nsize);
4246     }
4247
4248   ainc_cost_data *data = ainc_cost_data_list[idx];
4249   if (data == NULL)
4250     {
4251       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4252
4253       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4254       data->costs[AINC_PRE_DEC] = INFTY;
4255       data->costs[AINC_POST_DEC] = INFTY;
4256       data->costs[AINC_PRE_INC] = INFTY;
4257       data->costs[AINC_POST_INC] = INFTY;
4258       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4259           || USE_STORE_PRE_DECREMENT (mem_mode))
4260         {
4261           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4262
4263           if (memory_address_addr_space_p (mem_mode, addr, as))
4264             data->costs[AINC_PRE_DEC]
4265               = address_cost (addr, mem_mode, as, speed);
4266         }
4267       if (USE_LOAD_POST_DECREMENT (mem_mode)
4268           || USE_STORE_POST_DECREMENT (mem_mode))
4269         {
4270           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4271
4272           if (memory_address_addr_space_p (mem_mode, addr, as))
4273             data->costs[AINC_POST_DEC]
4274               = address_cost (addr, mem_mode, as, speed);
4275         }
4276       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4277           || USE_STORE_PRE_INCREMENT (mem_mode))
4278         {
4279           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4280
4281           if (memory_address_addr_space_p (mem_mode, addr, as))
4282             data->costs[AINC_PRE_INC]
4283               = address_cost (addr, mem_mode, as, speed);
4284         }
4285       if (USE_LOAD_POST_INCREMENT (mem_mode)
4286           || USE_STORE_POST_INCREMENT (mem_mode))
4287         {
4288           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4289
4290           if (memory_address_addr_space_p (mem_mode, addr, as))
4291             data->costs[AINC_POST_INC]
4292               = address_cost (addr, mem_mode, as, speed);
4293         }
4294       ainc_cost_data_list[idx] = data;
4295     }
4296
4297   HOST_WIDE_INT msize = GET_MODE_SIZE (mem_mode);
4298   if (ainc_offset == 0 && msize == ainc_step)
4299     return comp_cost (data->costs[AINC_POST_INC], 0);
4300   if (ainc_offset == 0 && msize == -ainc_step)
4301     return comp_cost (data->costs[AINC_POST_DEC], 0);
4302   if (ainc_offset == msize && msize == ainc_step)
4303     return comp_cost (data->costs[AINC_PRE_INC], 0);
4304   if (ainc_offset == -msize && msize == -ainc_step)
4305     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4306
4307   return infinite_cost;
4308 }
4309
4310 /* Return cost of computing USE's address expression by using CAND.
4311    AFF_INV and AFF_VAR represent invariant and variant parts of the
4312    address expression, respectively.  If AFF_INV is simple, store
4313    the loop invariant variables which are depended by it in INV_VARS;
4314    if AFF_INV is complicated, handle it as a new invariant expression
4315    and record it in INV_EXPR.  RATIO indicates multiple times between
4316    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4317    value to it indicating if this is an auto-increment address.  */
4318
4319 static comp_cost
4320 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4321                   struct iv_cand *cand, aff_tree *aff_inv,
4322                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4323                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4324                   bool *can_autoinc, bool speed)
4325 {
4326   rtx addr;
4327   bool simple_inv = true;
4328   tree comp_inv = NULL_TREE, type = aff_var->type;
4329   comp_cost var_cost = no_cost, cost = no_cost;
4330   struct mem_address parts = {NULL_TREE, integer_one_node,
4331                               NULL_TREE, NULL_TREE, NULL_TREE};
4332   machine_mode addr_mode = TYPE_MODE (type);
4333   machine_mode mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
4334   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4335
4336   if (!aff_combination_const_p (aff_inv))
4337     {
4338       parts.index = integer_one_node;
4339       /* Addressing mode "base + index".  */
4340       if (valid_mem_ref_p (mem_mode, as, &parts))
4341         {
4342           parts.step = wide_int_to_tree (type, ratio);
4343           /* Addressing mode "base + index << scale".  */
4344           if (ratio != 1 && !valid_mem_ref_p (mem_mode, as, &parts))
4345             parts.step = NULL_TREE;
4346
4347           if (aff_inv->offset != 0)
4348             {
4349               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4350               /* Addressing mode "base + index [<< scale] + offset".  */
4351               if (!valid_mem_ref_p (mem_mode, as, &parts))
4352                 parts.offset = NULL_TREE;
4353               else
4354                 aff_inv->offset = 0;
4355             }
4356
4357           move_fixed_address_to_symbol (&parts, aff_inv);
4358           /* Base is fixed address and is moved to symbol part.  */
4359           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4360             parts.base = NULL_TREE;
4361
4362           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4363           if (parts.symbol != NULL_TREE
4364               && !valid_mem_ref_p (mem_mode, as, &parts))
4365             {
4366               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4367               parts.symbol = NULL_TREE;
4368               /* Reset SIMPLE_INV since symbol address needs to be computed
4369                  outside of address expression in this case.  */
4370               simple_inv = false;
4371               /* Symbol part is moved back to base part, it can't be NULL.  */
4372               parts.base = integer_one_node;
4373             }
4374         }
4375       else
4376         parts.index = NULL_TREE;
4377     }
4378   else
4379     {
4380       if (can_autoinc && ratio == 1 && cst_and_fits_in_hwi (cand->iv->step))
4381         {
4382           HOST_WIDE_INT ainc_step = int_cst_value (cand->iv->step);
4383           HOST_WIDE_INT ainc_offset = (aff_inv->offset).to_shwi ();
4384
4385           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4386             ainc_offset += ainc_step;
4387           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4388                                         addr_mode, mem_mode, as, speed);
4389           if (!cost.infinite_cost_p ())
4390             {
4391               *can_autoinc = true;
4392               return cost;
4393             }
4394           cost = no_cost;
4395         }
4396       if (!aff_combination_zero_p (aff_inv))
4397         {
4398           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4399           /* Addressing mode "base + offset".  */
4400           if (!valid_mem_ref_p (mem_mode, as, &parts))
4401             parts.offset = NULL_TREE;
4402           else
4403             aff_inv->offset = 0;
4404         }
4405     }
4406
4407   if (simple_inv)
4408     simple_inv = (aff_inv == NULL
4409                   || aff_combination_const_p (aff_inv)
4410                   || aff_combination_singleton_var_p (aff_inv));
4411   if (!aff_combination_zero_p (aff_inv))
4412     comp_inv = aff_combination_to_tree (aff_inv);
4413   if (comp_inv != NULL_TREE)
4414     cost = force_var_cost (data, comp_inv, inv_vars);
4415   if (ratio != 1 && parts.step == NULL_TREE)
4416     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4417   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4418     var_cost += add_cost (speed, addr_mode);
4419
4420   if (comp_inv && inv_expr && !simple_inv)
4421     {
4422       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4423       /* Clear depends on.  */
4424       if (*inv_expr != NULL && inv_vars && *inv_vars)
4425         bitmap_clear (*inv_vars);
4426
4427       /* Cost of small invariant expression adjusted against loop niters
4428          is usually zero, which makes it difficult to be differentiated
4429          from candidate based on loop invariant variables.  Secondly, the
4430          generated invariant expression may not be hoisted out of loop by
4431          following pass.  We penalize the cost by rounding up in order to
4432          neutralize such effects.  */
4433       cost.cost = adjust_setup_cost (data, cost.cost, true);
4434       cost.scratch = cost.cost;
4435     }
4436
4437   cost += var_cost;
4438   addr = addr_for_mem_ref (&parts, as, false);
4439   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4440   cost += address_cost (addr, mem_mode, as, speed);
4441
4442   if (parts.symbol != NULL_TREE)
4443     cost.complexity += 1;
4444   if (parts.step != NULL_TREE && !integer_onep (parts.step))
4445     cost.complexity += 1;
4446   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4447     cost.complexity += 1;
4448   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4449     cost.complexity += 1;
4450
4451   return cost;
4452 }
4453
4454 /* Scale (multiply) the computed COST (except scratch part that should be
4455    hoisted out a loop) by header->frequency / AT->frequency, which makes
4456    expected cost more accurate.  */
4457
4458 static comp_cost
4459 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4460 {
4461    int loop_freq = data->current_loop->header->frequency;
4462    int bb_freq = gimple_bb (at)->frequency;
4463    if (loop_freq != 0)
4464      {
4465        gcc_assert (cost.scratch <= cost.cost);
4466        int scaled_cost
4467          = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4468
4469        if (dump_file && (dump_flags & TDF_DETAILS))
4470          fprintf (dump_file, "Scaling cost based on bb prob "
4471                   "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4472                   1.0f * bb_freq / loop_freq, cost.cost,
4473                   cost.scratch, scaled_cost, bb_freq, loop_freq);
4474
4475        cost.cost = scaled_cost;
4476      }
4477
4478   return cost;
4479 }
4480
4481 /* Determines the cost of the computation by that USE is expressed
4482    from induction variable CAND.  If ADDRESS_P is true, we just need
4483    to create an address from it, otherwise we want to get it into
4484    register.  A set of invariants we depend on is stored in INV_VARS.
4485    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4486    addressing is likely.  If INV_EXPR is nonnull, record invariant
4487    expr entry in it.  */
4488
4489 static comp_cost
4490 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4491                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4492                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4493 {
4494   gimple *at = use->stmt;
4495   tree ubase = use->iv->base, cbase = cand->iv->base;
4496   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4497   tree comp_inv = NULL_TREE;
4498   HOST_WIDE_INT ratio, aratio;
4499   comp_cost cost;
4500   widest_int rat;
4501   aff_tree aff_inv, aff_var;
4502   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4503
4504   if (inv_vars)
4505     *inv_vars = NULL;
4506   if (can_autoinc)
4507     *can_autoinc = false;
4508   if (inv_expr)
4509     *inv_expr = NULL;
4510
4511   /* Check if we have enough precision to express the values of use.  */
4512   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4513     return infinite_cost;
4514
4515   if (address_p
4516       || (use->iv->base_object
4517           && cand->iv->base_object
4518           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4519           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4520     {
4521       /* Do not try to express address of an object with computation based
4522          on address of a different object.  This may cause problems in rtl
4523          level alias analysis (that does not expect this to be happening,
4524          as this is illegal in C), and would be unlikely to be useful
4525          anyway.  */
4526       if (use->iv->base_object
4527           && cand->iv->base_object
4528           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4529         return infinite_cost;
4530     }
4531
4532   if (!get_computation_aff_1 (data->current_loop, at, use,
4533                               cand, &aff_inv, &aff_var, &rat)
4534       || !wi::fits_shwi_p (rat))
4535     return infinite_cost;
4536
4537   ratio = rat.to_shwi ();
4538   if (address_p)
4539     {
4540       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4541                                inv_vars, inv_expr, can_autoinc, speed);
4542       return get_scaled_computation_cost_at (data, at, cost);
4543     }
4544
4545   bool simple_inv = (aff_combination_const_p (&aff_inv)
4546                      || aff_combination_singleton_var_p (&aff_inv));
4547   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4548   aff_combination_convert (&aff_inv, signed_type);
4549   if (!aff_combination_zero_p (&aff_inv))
4550     comp_inv = aff_combination_to_tree (&aff_inv);
4551
4552   cost = force_var_cost (data, comp_inv, inv_vars);
4553   if (comp_inv && inv_expr && !simple_inv)
4554     {
4555       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4556       /* Clear depends on.  */
4557       if (*inv_expr != NULL && inv_vars && *inv_vars)
4558         bitmap_clear (*inv_vars);
4559
4560       cost.cost = adjust_setup_cost (data, cost.cost);
4561       /* Record setup cost in scratch field.  */
4562       cost.scratch = cost.cost;
4563     }
4564   /* Cost of constant integer can be covered when adding invariant part to
4565      variant part.  */
4566   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4567     cost = no_cost;
4568
4569   /* Need type narrowing to represent use with cand.  */
4570   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4571     {
4572       machine_mode outer_mode = TYPE_MODE (utype);
4573       machine_mode inner_mode = TYPE_MODE (ctype);
4574       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4575     }
4576
4577   /* Turn a + i * (-c) into a - i * c.  */
4578   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4579     aratio = -ratio;
4580   else
4581     aratio = ratio;
4582
4583   if (ratio != 1)
4584     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4585
4586   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4587      instruction.  */
4588   /* Need to add up the invariant and variant parts.  */
4589   if (comp_inv && !integer_zerop (comp_inv))
4590     cost += add_cost (speed, TYPE_MODE (utype));
4591
4592   return get_scaled_computation_cost_at (data, at, cost);
4593 }
4594
4595 /* Determines cost of computing the use in GROUP with CAND in a generic
4596    expression.  */
4597
4598 static bool
4599 determine_group_iv_cost_generic (struct ivopts_data *data,
4600                                  struct iv_group *group, struct iv_cand *cand)
4601 {
4602   comp_cost cost;
4603   iv_inv_expr_ent *inv_expr = NULL;
4604   bitmap inv_vars = NULL, inv_exprs = NULL;
4605   struct iv_use *use = group->vuses[0];
4606
4607   /* The simple case first -- if we need to express value of the preserved
4608      original biv, the cost is 0.  This also prevents us from counting the
4609      cost of increment twice -- once at this use and once in the cost of
4610      the candidate.  */
4611   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4612     cost = no_cost;
4613   else
4614     cost = get_computation_cost (data, use, cand, false,
4615                                  &inv_vars, NULL, &inv_expr);
4616
4617   if (inv_expr)
4618     {
4619       inv_exprs = BITMAP_ALLOC (NULL);
4620       bitmap_set_bit (inv_exprs, inv_expr->id);
4621     }
4622   set_group_iv_cost (data, group, cand, cost, inv_vars,
4623                      NULL_TREE, ERROR_MARK, inv_exprs);
4624   return !cost.infinite_cost_p ();
4625 }
4626
4627 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4628
4629 static bool
4630 determine_group_iv_cost_address (struct ivopts_data *data,
4631                                  struct iv_group *group, struct iv_cand *cand)
4632 {
4633   unsigned i;
4634   bitmap inv_vars = NULL, inv_exprs = NULL;
4635   bool can_autoinc;
4636   iv_inv_expr_ent *inv_expr = NULL;
4637   struct iv_use *use = group->vuses[0];
4638   comp_cost sum_cost = no_cost, cost;
4639
4640   cost = get_computation_cost (data, use, cand, true,
4641                                &inv_vars, &can_autoinc, &inv_expr);
4642
4643   if (inv_expr)
4644     {
4645       inv_exprs = BITMAP_ALLOC (NULL);
4646       bitmap_set_bit (inv_exprs, inv_expr->id);
4647     }
4648   sum_cost = cost;
4649   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4650     {
4651       if (can_autoinc)
4652         sum_cost -= cand->cost_step;
4653       /* If we generated the candidate solely for exploiting autoincrement
4654          opportunities, and it turns out it can't be used, set the cost to
4655          infinity to make sure we ignore it.  */
4656       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4657         sum_cost = infinite_cost;
4658     }
4659
4660   /* Uses in a group can share setup code, so only add setup cost once.  */
4661   cost -= cost.scratch;
4662   /* Compute and add costs for rest uses of this group.  */
4663   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4664     {
4665       struct iv_use *next = group->vuses[i];
4666
4667       /* TODO: We could skip computing cost for sub iv_use when it has the
4668          same cost as the first iv_use, but the cost really depends on the
4669          offset and where the iv_use is.  */
4670         cost = get_computation_cost (data, next, cand, true,
4671                                      NULL, &can_autoinc, &inv_expr);
4672         if (inv_expr)
4673           {
4674             if (!inv_exprs)
4675               inv_exprs = BITMAP_ALLOC (NULL);
4676
4677             bitmap_set_bit (inv_exprs, inv_expr->id);
4678           }
4679       sum_cost += cost;
4680     }
4681   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4682                      NULL_TREE, ERROR_MARK, inv_exprs);
4683
4684   return !sum_cost.infinite_cost_p ();
4685 }
4686
4687 /* Computes value of candidate CAND at position AT in iteration NITER, and
4688    stores it to VAL.  */
4689
4690 static void
4691 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4692                aff_tree *val)
4693 {
4694   aff_tree step, delta, nit;
4695   struct iv *iv = cand->iv;
4696   tree type = TREE_TYPE (iv->base);
4697   tree steptype;
4698   if (POINTER_TYPE_P (type))
4699     steptype = sizetype;
4700   else
4701     steptype = unsigned_type_for (type);
4702
4703   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4704   aff_combination_convert (&step, steptype);
4705   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4706   aff_combination_convert (&nit, steptype);
4707   aff_combination_mult (&nit, &step, &delta);
4708   if (stmt_after_increment (loop, cand, at))
4709     aff_combination_add (&delta, &step);
4710
4711   tree_to_aff_combination (iv->base, type, val);
4712   if (!POINTER_TYPE_P (type))
4713     aff_combination_convert (val, steptype);
4714   aff_combination_add (val, &delta);
4715 }
4716
4717 /* Returns period of induction variable iv.  */
4718
4719 static tree
4720 iv_period (struct iv *iv)
4721 {
4722   tree step = iv->step, period, type;
4723   tree pow2div;
4724
4725   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4726
4727   type = unsigned_type_for (TREE_TYPE (step));
4728   /* Period of the iv is lcm (step, type_range)/step -1,
4729      i.e., N*type_range/step - 1. Since type range is power
4730      of two, N == (step >> num_of_ending_zeros_binary (step),
4731      so the final result is
4732
4733        (type_range >> num_of_ending_zeros_binary (step)) - 1
4734
4735   */
4736   pow2div = num_ending_zeros (step);
4737
4738   period = build_low_bits_mask (type,
4739                                 (TYPE_PRECISION (type)
4740                                  - tree_to_uhwi (pow2div)));
4741
4742   return period;
4743 }
4744
4745 /* Returns the comparison operator used when eliminating the iv USE.  */
4746
4747 static enum tree_code
4748 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4749 {
4750   struct loop *loop = data->current_loop;
4751   basic_block ex_bb;
4752   edge exit;
4753
4754   ex_bb = gimple_bb (use->stmt);
4755   exit = EDGE_SUCC (ex_bb, 0);
4756   if (flow_bb_inside_loop_p (loop, exit->dest))
4757     exit = EDGE_SUCC (ex_bb, 1);
4758
4759   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4760 }
4761
4762 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4763    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4764    calculation is performed in non-wrapping type.
4765
4766    TODO: More generally, we could test for the situation that
4767          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4768          This would require knowing the sign of OFFSET.  */
4769
4770 static bool
4771 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4772 {
4773   enum tree_code code;
4774   tree e1, e2;
4775   aff_tree aff_e1, aff_e2, aff_offset;
4776
4777   if (!nowrap_type_p (TREE_TYPE (base)))
4778     return false;
4779
4780   base = expand_simple_operations (base);
4781
4782   if (TREE_CODE (base) == SSA_NAME)
4783     {
4784       gimple *stmt = SSA_NAME_DEF_STMT (base);
4785
4786       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4787         return false;
4788
4789       code = gimple_assign_rhs_code (stmt);
4790       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4791         return false;
4792
4793       e1 = gimple_assign_rhs1 (stmt);
4794       e2 = gimple_assign_rhs2 (stmt);
4795     }
4796   else
4797     {
4798       code = TREE_CODE (base);
4799       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4800         return false;
4801       e1 = TREE_OPERAND (base, 0);
4802       e2 = TREE_OPERAND (base, 1);
4803     }
4804
4805   /* Use affine expansion as deeper inspection to prove the equality.  */
4806   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4807                                   &aff_e2, &data->name_expansion_cache);
4808   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4809                                   &aff_offset, &data->name_expansion_cache);
4810   aff_combination_scale (&aff_offset, -1);
4811   switch (code)
4812     {
4813     case PLUS_EXPR:
4814       aff_combination_add (&aff_e2, &aff_offset);
4815       if (aff_combination_zero_p (&aff_e2))
4816         return true;
4817
4818       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4819                                       &aff_e1, &data->name_expansion_cache);
4820       aff_combination_add (&aff_e1, &aff_offset);
4821       return aff_combination_zero_p (&aff_e1);
4822
4823     case POINTER_PLUS_EXPR:
4824       aff_combination_add (&aff_e2, &aff_offset);
4825       return aff_combination_zero_p (&aff_e2);
4826
4827     default:
4828       return false;
4829     }
4830 }
4831
4832 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4833    comparison with CAND.  NITER describes the number of iterations of
4834    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4835
4836    We aim to handle the following situation:
4837
4838    sometype *base, *p;
4839    int a, b, i;
4840
4841    i = a;
4842    p = p_0 = base + a;
4843
4844    do
4845      {
4846        bla (*p);
4847        p++;
4848        i++;
4849      }
4850    while (i < b);
4851
4852    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4853    We aim to optimize this to
4854
4855    p = p_0 = base + a;
4856    do
4857      {
4858        bla (*p);
4859        p++;
4860      }
4861    while (p < p_0 - a + b);
4862
4863    This preserves the correctness, since the pointer arithmetics does not
4864    overflow.  More precisely:
4865
4866    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4867       overflow in computing it or the values of p.
4868    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4869       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4870
4871 static bool
4872 iv_elimination_compare_lt (struct ivopts_data *data,
4873                            struct iv_cand *cand, enum tree_code *comp_p,
4874                            struct tree_niter_desc *niter)
4875 {
4876   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4877   struct aff_tree nit, tmpa, tmpb;
4878   enum tree_code comp;
4879   HOST_WIDE_INT step;
4880
4881   /* We need to know that the candidate induction variable does not overflow.
4882      While more complex analysis may be used to prove this, for now just
4883      check that the variable appears in the original program and that it
4884      is computed in a type that guarantees no overflows.  */
4885   cand_type = TREE_TYPE (cand->iv->base);
4886   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4887     return false;
4888
4889   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4890      the calculation of the BOUND could overflow, making the comparison
4891      invalid.  */
4892   if (!data->loop_single_exit_p)
4893     return false;
4894
4895   /* We need to be able to decide whether candidate is increasing or decreasing
4896      in order to choose the right comparison operator.  */
4897   if (!cst_and_fits_in_hwi (cand->iv->step))
4898     return false;
4899   step = int_cst_value (cand->iv->step);
4900
4901   /* Check that the number of iterations matches the expected pattern:
4902      a + 1 > b ? 0 : b - a - 1.  */
4903   mbz = niter->may_be_zero;
4904   if (TREE_CODE (mbz) == GT_EXPR)
4905     {
4906       /* Handle a + 1 > b.  */
4907       tree op0 = TREE_OPERAND (mbz, 0);
4908       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4909         {
4910           a = TREE_OPERAND (op0, 0);
4911           b = TREE_OPERAND (mbz, 1);
4912         }
4913       else
4914         return false;
4915     }
4916   else if (TREE_CODE (mbz) == LT_EXPR)
4917     {
4918       tree op1 = TREE_OPERAND (mbz, 1);
4919
4920       /* Handle b < a + 1.  */
4921       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4922         {
4923           a = TREE_OPERAND (op1, 0);
4924           b = TREE_OPERAND (mbz, 0);
4925         }
4926       else
4927         return false;
4928     }
4929   else
4930     return false;
4931
4932   /* Expected number of iterations is B - A - 1.  Check that it matches
4933      the actual number, i.e., that B - A - NITER = 1.  */
4934   tree_to_aff_combination (niter->niter, nit_type, &nit);
4935   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4936   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4937   aff_combination_scale (&nit, -1);
4938   aff_combination_scale (&tmpa, -1);
4939   aff_combination_add (&tmpb, &tmpa);
4940   aff_combination_add (&tmpb, &nit);
4941   if (tmpb.n != 0 || tmpb.offset != 1)
4942     return false;
4943
4944   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4945      overflow.  */
4946   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4947                         cand->iv->step,
4948                         fold_convert (TREE_TYPE (cand->iv->step), a));
4949   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4950     return false;
4951
4952   /* Determine the new comparison operator.  */
4953   comp = step < 0 ? GT_EXPR : LT_EXPR;
4954   if (*comp_p == NE_EXPR)
4955     *comp_p = comp;
4956   else if (*comp_p == EQ_EXPR)
4957     *comp_p = invert_tree_comparison (comp, false);
4958   else
4959     gcc_unreachable ();
4960
4961   return true;
4962 }
4963
4964 /* Check whether it is possible to express the condition in USE by comparison
4965    of candidate CAND.  If so, store the value compared with to BOUND, and the
4966    comparison operator to COMP.  */
4967
4968 static bool
4969 may_eliminate_iv (struct ivopts_data *data,
4970                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4971                   enum tree_code *comp)
4972 {
4973   basic_block ex_bb;
4974   edge exit;
4975   tree period;
4976   struct loop *loop = data->current_loop;
4977   aff_tree bnd;
4978   struct tree_niter_desc *desc = NULL;
4979
4980   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4981     return false;
4982
4983   /* For now works only for exits that dominate the loop latch.
4984      TODO: extend to other conditions inside loop body.  */
4985   ex_bb = gimple_bb (use->stmt);
4986   if (use->stmt != last_stmt (ex_bb)
4987       || gimple_code (use->stmt) != GIMPLE_COND
4988       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4989     return false;
4990
4991   exit = EDGE_SUCC (ex_bb, 0);
4992   if (flow_bb_inside_loop_p (loop, exit->dest))
4993     exit = EDGE_SUCC (ex_bb, 1);
4994   if (flow_bb_inside_loop_p (loop, exit->dest))
4995     return false;
4996
4997   desc = niter_for_exit (data, exit);
4998   if (!desc)
4999     return false;
5000
5001   /* Determine whether we can use the variable to test the exit condition.
5002      This is the case iff the period of the induction variable is greater
5003      than the number of iterations for which the exit condition is true.  */
5004   period = iv_period (cand->iv);
5005
5006   /* If the number of iterations is constant, compare against it directly.  */
5007   if (TREE_CODE (desc->niter) == INTEGER_CST)
5008     {
5009       /* See cand_value_at.  */
5010       if (stmt_after_increment (loop, cand, use->stmt))
5011         {
5012           if (!tree_int_cst_lt (desc->niter, period))
5013             return false;
5014         }
5015       else
5016         {
5017           if (tree_int_cst_lt (period, desc->niter))
5018             return false;
5019         }
5020     }
5021
5022   /* If not, and if this is the only possible exit of the loop, see whether
5023      we can get a conservative estimate on the number of iterations of the
5024      entire loop and compare against that instead.  */
5025   else
5026     {
5027       widest_int period_value, max_niter;
5028
5029       max_niter = desc->max;
5030       if (stmt_after_increment (loop, cand, use->stmt))
5031         max_niter += 1;
5032       period_value = wi::to_widest (period);
5033       if (wi::gtu_p (max_niter, period_value))
5034         {
5035           /* See if we can take advantage of inferred loop bound
5036              information.  */
5037           if (data->loop_single_exit_p)
5038             {
5039               if (!max_loop_iterations (loop, &max_niter))
5040                 return false;
5041               /* The loop bound is already adjusted by adding 1.  */
5042               if (wi::gtu_p (max_niter, period_value))
5043                 return false;
5044             }
5045           else
5046             return false;
5047         }
5048     }
5049
5050   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5051
5052   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5053                          aff_combination_to_tree (&bnd));
5054   *comp = iv_elimination_compare (data, use);
5055
5056   /* It is unlikely that computing the number of iterations using division
5057      would be more profitable than keeping the original induction variable.  */
5058   if (expression_expensive_p (*bound))
5059     return false;
5060
5061   /* Sometimes, it is possible to handle the situation that the number of
5062      iterations may be zero unless additional assumptions by using <
5063      instead of != in the exit condition.
5064
5065      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5066            base the exit condition on it.  However, that is often too
5067            expensive.  */
5068   if (!integer_zerop (desc->may_be_zero))
5069     return iv_elimination_compare_lt (data, cand, comp, desc);
5070
5071   return true;
5072 }
5073
5074  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5075     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5076
5077 static int
5078 parm_decl_cost (struct ivopts_data *data, tree bound)
5079 {
5080   tree sbound = bound;
5081   STRIP_NOPS (sbound);
5082
5083   if (TREE_CODE (sbound) == SSA_NAME
5084       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5085       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5086       && data->body_includes_call)
5087     return COSTS_N_INSNS (1);
5088
5089   return 0;
5090 }
5091
5092 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5093
5094 static bool
5095 determine_group_iv_cost_cond (struct ivopts_data *data,
5096                               struct iv_group *group, struct iv_cand *cand)
5097 {
5098   tree bound = NULL_TREE;
5099   struct iv *cmp_iv;
5100   bitmap inv_exprs = NULL;
5101   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5102   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5103   enum comp_iv_rewrite rewrite_type;
5104   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5105   tree *control_var, *bound_cst;
5106   enum tree_code comp = ERROR_MARK;
5107   struct iv_use *use = group->vuses[0];
5108
5109   /* Extract condition operands.  */
5110   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5111                                         &bound_cst, NULL, &cmp_iv);
5112   gcc_assert (rewrite_type != COMP_IV_NA);
5113
5114   /* Try iv elimination.  */
5115   if (rewrite_type == COMP_IV_ELIM
5116       && may_eliminate_iv (data, use, cand, &bound, &comp))
5117     {
5118       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5119       if (elim_cost.cost == 0)
5120         elim_cost.cost = parm_decl_cost (data, bound);
5121       else if (TREE_CODE (bound) == INTEGER_CST)
5122         elim_cost.cost = 0;
5123       /* If we replace a loop condition 'i < n' with 'p < base + n',
5124          inv_vars_elim will have 'base' and 'n' set, which implies that both
5125          'base' and 'n' will be live during the loop.    More likely,
5126          'base + n' will be loop invariant, resulting in only one live value
5127          during the loop.  So in that case we clear inv_vars_elim and set
5128          inv_expr_elim instead.  */
5129       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5130         {
5131           inv_expr_elim = get_loop_invariant_expr (data, bound);
5132           bitmap_clear (inv_vars_elim);
5133         }
5134       /* The bound is a loop invariant, so it will be only computed
5135          once.  */
5136       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5137     }
5138
5139   /* When the condition is a comparison of the candidate IV against
5140      zero, prefer this IV.
5141
5142      TODO: The constant that we're subtracting from the cost should
5143      be target-dependent.  This information should be added to the
5144      target costs for each backend.  */
5145   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5146       && integer_zerop (*bound_cst)
5147       && (operand_equal_p (*control_var, cand->var_after, 0)
5148           || operand_equal_p (*control_var, cand->var_before, 0)))
5149     elim_cost -= 1;
5150
5151   express_cost = get_computation_cost (data, use, cand, false,
5152                                        &inv_vars_express, NULL,
5153                                        &inv_expr_express);
5154   if (cmp_iv != NULL)
5155     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5156
5157   /* Count the cost of the original bound as well.  */
5158   bound_cost = force_var_cost (data, *bound_cst, NULL);
5159   if (bound_cost.cost == 0)
5160     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5161   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5162     bound_cost.cost = 0;
5163   express_cost += bound_cost;
5164
5165   /* Choose the better approach, preferring the eliminated IV. */
5166   if (elim_cost <= express_cost)
5167     {
5168       cost = elim_cost;
5169       inv_vars = inv_vars_elim;
5170       inv_vars_elim = NULL;
5171       inv_expr = inv_expr_elim;
5172     }
5173   else
5174     {
5175       cost = express_cost;
5176       inv_vars = inv_vars_express;
5177       inv_vars_express = NULL;
5178       bound = NULL_TREE;
5179       comp = ERROR_MARK;
5180       inv_expr = inv_expr_express;
5181     }
5182
5183   if (inv_expr)
5184     {
5185       inv_exprs = BITMAP_ALLOC (NULL);
5186       bitmap_set_bit (inv_exprs, inv_expr->id);
5187     }
5188   set_group_iv_cost (data, group, cand, cost,
5189                      inv_vars, bound, comp, inv_exprs);
5190
5191   if (inv_vars_elim)
5192     BITMAP_FREE (inv_vars_elim);
5193   if (inv_vars_express)
5194     BITMAP_FREE (inv_vars_express);
5195
5196   return !cost.infinite_cost_p ();
5197 }
5198
5199 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5200    if USE cannot be represented with CAND.  */
5201
5202 static bool
5203 determine_group_iv_cost (struct ivopts_data *data,
5204                          struct iv_group *group, struct iv_cand *cand)
5205 {
5206   switch (group->type)
5207     {
5208     case USE_NONLINEAR_EXPR:
5209       return determine_group_iv_cost_generic (data, group, cand);
5210
5211     case USE_ADDRESS:
5212       return determine_group_iv_cost_address (data, group, cand);
5213
5214     case USE_COMPARE:
5215       return determine_group_iv_cost_cond (data, group, cand);
5216
5217     default:
5218       gcc_unreachable ();
5219     }
5220 }
5221
5222 /* Return true if get_computation_cost indicates that autoincrement is
5223    a possibility for the pair of USE and CAND, false otherwise.  */
5224
5225 static bool
5226 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5227                            struct iv_cand *cand)
5228 {
5229   if (use->type != USE_ADDRESS)
5230     return false;
5231
5232   bool can_autoinc = false;
5233   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5234   return can_autoinc;
5235 }
5236
5237 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5238    use that allows autoincrement, and set their AINC_USE if possible.  */
5239
5240 static void
5241 set_autoinc_for_original_candidates (struct ivopts_data *data)
5242 {
5243   unsigned i, j;
5244
5245   for (i = 0; i < data->vcands.length (); i++)
5246     {
5247       struct iv_cand *cand = data->vcands[i];
5248       struct iv_use *closest_before = NULL;
5249       struct iv_use *closest_after = NULL;
5250       if (cand->pos != IP_ORIGINAL)
5251         continue;
5252
5253       for (j = 0; j < data->vgroups.length (); j++)
5254         {
5255           struct iv_group *group = data->vgroups[j];
5256           struct iv_use *use = group->vuses[0];
5257           unsigned uid = gimple_uid (use->stmt);
5258
5259           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5260             continue;
5261
5262           if (uid < gimple_uid (cand->incremented_at)
5263               && (closest_before == NULL
5264                   || uid > gimple_uid (closest_before->stmt)))
5265             closest_before = use;
5266
5267           if (uid > gimple_uid (cand->incremented_at)
5268               && (closest_after == NULL
5269                   || uid < gimple_uid (closest_after->stmt)))
5270             closest_after = use;
5271         }
5272
5273       if (closest_before != NULL
5274           && autoinc_possible_for_pair (data, closest_before, cand))
5275         cand->ainc_use = closest_before;
5276       else if (closest_after != NULL
5277                && autoinc_possible_for_pair (data, closest_after, cand))
5278         cand->ainc_use = closest_after;
5279     }
5280 }
5281
5282 /* Relate compare use with all candidates.  */
5283
5284 static void
5285 relate_compare_use_with_all_cands (struct ivopts_data *data)
5286 {
5287   unsigned i, max_id = data->vcands.length () - 1;
5288   for (i = 0; i < data->vgroups.length (); i++)
5289     {
5290       struct iv_group *group = data->vgroups[i];
5291
5292       if (group->type == USE_COMPARE)
5293         bitmap_set_range (group->related_cands, 0, max_id);
5294     }
5295 }
5296
5297 /* Finds the candidates for the induction variables.  */
5298
5299 static void
5300 find_iv_candidates (struct ivopts_data *data)
5301 {
5302   /* Add commonly used ivs.  */
5303   add_standard_iv_candidates (data);
5304
5305   /* Add old induction variables.  */
5306   add_iv_candidate_for_bivs (data);
5307
5308   /* Add induction variables derived from uses.  */
5309   add_iv_candidate_for_groups (data);
5310
5311   set_autoinc_for_original_candidates (data);
5312
5313   /* Record the important candidates.  */
5314   record_important_candidates (data);
5315
5316   /* Relate compare iv_use with all candidates.  */
5317   if (!data->consider_all_candidates)
5318     relate_compare_use_with_all_cands (data);
5319
5320   if (dump_file && (dump_flags & TDF_DETAILS))
5321     {
5322       unsigned i;
5323
5324       fprintf (dump_file, "\n<Important Candidates>:\t");
5325       for (i = 0; i < data->vcands.length (); i++)
5326         if (data->vcands[i]->important)
5327           fprintf (dump_file, " %d,", data->vcands[i]->id);
5328       fprintf (dump_file, "\n");
5329
5330       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5331       for (i = 0; i < data->vgroups.length (); i++)
5332         {
5333           struct iv_group *group = data->vgroups[i];
5334
5335           if (group->related_cands)
5336             {
5337               fprintf (dump_file, "  Group %d:\t", group->id);
5338               dump_bitmap (dump_file, group->related_cands);
5339             }
5340         }
5341       fprintf (dump_file, "\n");
5342     }
5343 }
5344
5345 /* Determines costs of computing use of iv with an iv candidate.  */
5346
5347 static void
5348 determine_group_iv_costs (struct ivopts_data *data)
5349 {
5350   unsigned i, j;
5351   struct iv_cand *cand;
5352   struct iv_group *group;
5353   bitmap to_clear = BITMAP_ALLOC (NULL);
5354
5355   alloc_use_cost_map (data);
5356
5357   for (i = 0; i < data->vgroups.length (); i++)
5358     {
5359       group = data->vgroups[i];
5360
5361       if (data->consider_all_candidates)
5362         {
5363           for (j = 0; j < data->vcands.length (); j++)
5364             {
5365               cand = data->vcands[j];
5366               determine_group_iv_cost (data, group, cand);
5367             }
5368         }
5369       else
5370         {
5371           bitmap_iterator bi;
5372
5373           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5374             {
5375               cand = data->vcands[j];
5376               if (!determine_group_iv_cost (data, group, cand))
5377                 bitmap_set_bit (to_clear, j);
5378             }
5379
5380           /* Remove the candidates for that the cost is infinite from
5381              the list of related candidates.  */
5382           bitmap_and_compl_into (group->related_cands, to_clear);
5383           bitmap_clear (to_clear);
5384         }
5385     }
5386
5387   BITMAP_FREE (to_clear);
5388
5389   if (dump_file && (dump_flags & TDF_DETAILS))
5390     {
5391       bitmap_iterator bi;
5392
5393       /* Dump invariant variables.  */
5394       fprintf (dump_file, "\n<Invariant Vars>:\n");
5395       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5396         {
5397           struct version_info *info = ver_info (data, i);
5398           if (info->inv_id)
5399             {
5400               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5401               print_generic_expr (dump_file, info->name, TDF_SLIM);
5402               fprintf (dump_file, "%s\n",
5403                        info->has_nonlin_use ? "" : "\t(eliminable)");
5404             }
5405         }
5406
5407       /* Dump invariant expressions.  */
5408       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5409       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5410
5411       for (hash_table<iv_inv_expr_hasher>::iterator it
5412            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5413            ++it)
5414         list.safe_push (*it);
5415
5416       list.qsort (sort_iv_inv_expr_ent);
5417
5418       for (i = 0; i < list.length (); ++i)
5419         {
5420           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5421           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5422           fprintf (dump_file, "\n");
5423         }
5424
5425       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5426
5427       for (i = 0; i < data->vgroups.length (); i++)
5428         {
5429           group = data->vgroups[i];
5430
5431           fprintf (dump_file, "Group %d:\n", i);
5432           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5433           for (j = 0; j < group->n_map_members; j++)
5434             {
5435               if (!group->cost_map[j].cand
5436                   || group->cost_map[j].cost.infinite_cost_p ())
5437                 continue;
5438
5439               fprintf (dump_file, "  %d\t%d\t%d\t",
5440                        group->cost_map[j].cand->id,
5441                        group->cost_map[j].cost.cost,
5442                        group->cost_map[j].cost.complexity);
5443               if (!group->cost_map[j].inv_exprs
5444                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5445                 fprintf (dump_file, "NIL;\t");
5446               else
5447                 bitmap_print (dump_file,
5448                               group->cost_map[j].inv_exprs, "", ";\t");
5449               if (!group->cost_map[j].inv_vars
5450                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5451                 fprintf (dump_file, "NIL;\n");
5452               else
5453                 bitmap_print (dump_file,
5454                               group->cost_map[j].inv_vars, "", "\n");
5455             }
5456
5457           fprintf (dump_file, "\n");
5458         }
5459       fprintf (dump_file, "\n");
5460     }
5461 }
5462
5463 /* Determines cost of the candidate CAND.  */
5464
5465 static void
5466 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5467 {
5468   comp_cost cost_base;
5469   unsigned cost, cost_step;
5470   tree base;
5471
5472   gcc_assert (cand->iv != NULL);
5473
5474   /* There are two costs associated with the candidate -- its increment
5475      and its initialization.  The second is almost negligible for any loop
5476      that rolls enough, so we take it just very little into account.  */
5477
5478   base = cand->iv->base;
5479   cost_base = force_var_cost (data, base, NULL);
5480   /* It will be exceptional that the iv register happens to be initialized with
5481      the proper value at no cost.  In general, there will at least be a regcopy
5482      or a const set.  */
5483   if (cost_base.cost == 0)
5484     cost_base.cost = COSTS_N_INSNS (1);
5485   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5486
5487   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5488
5489   /* Prefer the original ivs unless we may gain something by replacing it.
5490      The reason is to make debugging simpler; so this is not relevant for
5491      artificial ivs created by other optimization passes.  */
5492   if (cand->pos != IP_ORIGINAL
5493       || !SSA_NAME_VAR (cand->var_before)
5494       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5495     cost++;
5496
5497   /* Prefer not to insert statements into latch unless there are some
5498      already (so that we do not create unnecessary jumps).  */
5499   if (cand->pos == IP_END
5500       && empty_block_p (ip_end_pos (data->current_loop)))
5501     cost++;
5502
5503   cand->cost = cost;
5504   cand->cost_step = cost_step;
5505 }
5506
5507 /* Determines costs of computation of the candidates.  */
5508
5509 static void
5510 determine_iv_costs (struct ivopts_data *data)
5511 {
5512   unsigned i;
5513
5514   if (dump_file && (dump_flags & TDF_DETAILS))
5515     {
5516       fprintf (dump_file, "<Candidate Costs>:\n");
5517       fprintf (dump_file, "  cand\tcost\n");
5518     }
5519
5520   for (i = 0; i < data->vcands.length (); i++)
5521     {
5522       struct iv_cand *cand = data->vcands[i];
5523
5524       determine_iv_cost (data, cand);
5525
5526       if (dump_file && (dump_flags & TDF_DETAILS))
5527         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5528     }
5529
5530   if (dump_file && (dump_flags & TDF_DETAILS))
5531     fprintf (dump_file, "\n");
5532 }
5533
5534 /* Calculates cost for having N_REGS registers.  This number includes
5535    induction variables, invariant variables and invariant expressions.  */
5536
5537 static unsigned
5538 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned n_regs)
5539 {
5540   unsigned cost = estimate_reg_pressure_cost (n_regs,
5541                                               data->regs_used, data->speed,
5542                                               data->body_includes_call);
5543   /* Add n_regs to the cost, so that we prefer eliminating ivs if possible.  */
5544   return n_regs + cost;
5545 }
5546
5547 /* For each size of the induction variable set determine the penalty.  */
5548
5549 static void
5550 determine_set_costs (struct ivopts_data *data)
5551 {
5552   unsigned j, n;
5553   gphi *phi;
5554   gphi_iterator psi;
5555   tree op;
5556   struct loop *loop = data->current_loop;
5557   bitmap_iterator bi;
5558
5559   if (dump_file && (dump_flags & TDF_DETAILS))
5560     {
5561       fprintf (dump_file, "<Global Costs>:\n");
5562       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5563       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5564       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5565       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5566     }
5567
5568   n = 0;
5569   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5570     {
5571       phi = psi.phi ();
5572       op = PHI_RESULT (phi);
5573
5574       if (virtual_operand_p (op))
5575         continue;
5576
5577       if (get_iv (data, op))
5578         continue;
5579
5580       if (!POINTER_TYPE_P (TREE_TYPE (op))
5581           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5582         continue;
5583
5584       n++;
5585     }
5586
5587   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5588     {
5589       struct version_info *info = ver_info (data, j);
5590
5591       if (info->inv_id && info->has_nonlin_use)
5592         n++;
5593     }
5594
5595   data->regs_used = n;
5596   if (dump_file && (dump_flags & TDF_DETAILS))
5597     fprintf (dump_file, "  regs_used %d\n", n);
5598
5599   if (dump_file && (dump_flags & TDF_DETAILS))
5600     {
5601       fprintf (dump_file, "  cost for size:\n");
5602       fprintf (dump_file, "  ivs\tcost\n");
5603       for (j = 0; j <= 2 * target_avail_regs; j++)
5604         fprintf (dump_file, "  %d\t%d\n", j,
5605                  ivopts_global_cost_for_size (data, j));
5606       fprintf (dump_file, "\n");
5607     }
5608 }
5609
5610 /* Returns true if A is a cheaper cost pair than B.  */
5611
5612 static bool
5613 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5614 {
5615   if (!a)
5616     return false;
5617
5618   if (!b)
5619     return true;
5620
5621   if (a->cost < b->cost)
5622     return true;
5623
5624   if (b->cost < a->cost)
5625     return false;
5626
5627   /* In case the costs are the same, prefer the cheaper candidate.  */
5628   if (a->cand->cost < b->cand->cost)
5629     return true;
5630
5631   return false;
5632 }
5633
5634 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5635    for more expensive, equal and cheaper respectively.  */
5636
5637 static int
5638 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5639 {
5640   if (cheaper_cost_pair (a, b))
5641     return -1;
5642   if (cheaper_cost_pair (b, a))
5643     return 1;
5644
5645   return 0;
5646 }
5647
5648 /* Returns candidate by that USE is expressed in IVS.  */
5649
5650 static struct cost_pair *
5651 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5652 {
5653   return ivs->cand_for_group[group->id];
5654 }
5655
5656 /* Computes the cost field of IVS structure.  */
5657
5658 static void
5659 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5660 {
5661   comp_cost cost = ivs->cand_use_cost;
5662
5663   cost += ivs->cand_cost;
5664   cost += ivopts_global_cost_for_size (data, ivs->n_invs + ivs->n_cands);
5665   ivs->cost = cost;
5666 }
5667
5668 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5669    and IVS.  */
5670
5671 static void
5672 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5673 {
5674   bitmap_iterator bi;
5675   unsigned iid;
5676
5677   if (!invs)
5678     return;
5679
5680   gcc_assert (n_inv_uses != NULL);
5681   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5682     {
5683       n_inv_uses[iid]--;
5684       if (n_inv_uses[iid] == 0)
5685         ivs->n_invs--;
5686     }
5687 }
5688
5689 /* Set USE not to be expressed by any candidate in IVS.  */
5690
5691 static void
5692 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5693                  struct iv_group *group)
5694 {
5695   unsigned gid = group->id, cid;
5696   struct cost_pair *cp;
5697
5698   cp = ivs->cand_for_group[gid];
5699   if (!cp)
5700     return;
5701   cid = cp->cand->id;
5702
5703   ivs->bad_groups++;
5704   ivs->cand_for_group[gid] = NULL;
5705   ivs->n_cand_uses[cid]--;
5706
5707   if (ivs->n_cand_uses[cid] == 0)
5708     {
5709       bitmap_clear_bit (ivs->cands, cid);
5710       ivs->n_cands--;
5711       ivs->cand_cost -= cp->cand->cost;
5712       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5713       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5714     }
5715
5716   ivs->cand_use_cost -= cp->cost;
5717   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5718   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5719   iv_ca_recount_cost (data, ivs);
5720 }
5721
5722 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5723    IVS.  */
5724
5725 static void
5726 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5727 {
5728   bitmap_iterator bi;
5729   unsigned iid;
5730
5731   if (!invs)
5732     return;
5733
5734   gcc_assert (n_inv_uses != NULL);
5735   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5736     {
5737       n_inv_uses[iid]++;
5738       if (n_inv_uses[iid] == 1)
5739         ivs->n_invs++;
5740     }
5741 }
5742
5743 /* Set cost pair for GROUP in set IVS to CP.  */
5744
5745 static void
5746 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5747               struct iv_group *group, struct cost_pair *cp)
5748 {
5749   unsigned gid = group->id, cid;
5750
5751   if (ivs->cand_for_group[gid] == cp)
5752     return;
5753
5754   if (ivs->cand_for_group[gid])
5755     iv_ca_set_no_cp (data, ivs, group);
5756
5757   if (cp)
5758     {
5759       cid = cp->cand->id;
5760
5761       ivs->bad_groups--;
5762       ivs->cand_for_group[gid] = cp;
5763       ivs->n_cand_uses[cid]++;
5764       if (ivs->n_cand_uses[cid] == 1)
5765         {
5766           bitmap_set_bit (ivs->cands, cid);
5767           ivs->n_cands++;
5768           ivs->cand_cost += cp->cand->cost;
5769           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5770           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5771         }
5772
5773       ivs->cand_use_cost += cp->cost;
5774       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5775       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5776       iv_ca_recount_cost (data, ivs);
5777     }
5778 }
5779
5780 /* Extend set IVS by expressing USE by some of the candidates in it
5781    if possible.  Consider all important candidates if candidates in
5782    set IVS don't give any result.  */
5783
5784 static void
5785 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5786                struct iv_group *group)
5787 {
5788   struct cost_pair *best_cp = NULL, *cp;
5789   bitmap_iterator bi;
5790   unsigned i;
5791   struct iv_cand *cand;
5792
5793   gcc_assert (ivs->upto >= group->id);
5794   ivs->upto++;
5795   ivs->bad_groups++;
5796
5797   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5798     {
5799       cand = data->vcands[i];
5800       cp = get_group_iv_cost (data, group, cand);
5801       if (cheaper_cost_pair (cp, best_cp))
5802         best_cp = cp;
5803     }
5804
5805   if (best_cp == NULL)
5806     {
5807       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5808         {
5809           cand = data->vcands[i];
5810           cp = get_group_iv_cost (data, group, cand);
5811           if (cheaper_cost_pair (cp, best_cp))
5812             best_cp = cp;
5813         }
5814     }
5815
5816   iv_ca_set_cp (data, ivs, group, best_cp);
5817 }
5818
5819 /* Get cost for assignment IVS.  */
5820
5821 static comp_cost
5822 iv_ca_cost (struct iv_ca *ivs)
5823 {
5824   /* This was a conditional expression but it triggered a bug in
5825      Sun C 5.5.  */
5826   if (ivs->bad_groups)
5827     return infinite_cost;
5828   else
5829     return ivs->cost;
5830 }
5831
5832 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5833    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
5834    respectively.  */
5835
5836 static int
5837 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5838                     struct iv_group *group, struct cost_pair *old_cp,
5839                     struct cost_pair *new_cp)
5840 {
5841   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5842   unsigned old_n_invs = ivs->n_invs;
5843   iv_ca_set_cp (data, ivs, group, new_cp);
5844   unsigned new_n_invs = ivs->n_invs;
5845   iv_ca_set_cp (data, ivs, group, old_cp);
5846
5847   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5848 }
5849
5850 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5851    it before NEXT.  */
5852
5853 static struct iv_ca_delta *
5854 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5855                  struct cost_pair *new_cp, struct iv_ca_delta *next)
5856 {
5857   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5858
5859   change->group = group;
5860   change->old_cp = old_cp;
5861   change->new_cp = new_cp;
5862   change->next = next;
5863
5864   return change;
5865 }
5866
5867 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5868    are rewritten.  */
5869
5870 static struct iv_ca_delta *
5871 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5872 {
5873   struct iv_ca_delta *last;
5874
5875   if (!l2)
5876     return l1;
5877
5878   if (!l1)
5879     return l2;
5880
5881   for (last = l1; last->next; last = last->next)
5882     continue;
5883   last->next = l2;
5884
5885   return l1;
5886 }
5887
5888 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5889
5890 static struct iv_ca_delta *
5891 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5892 {
5893   struct iv_ca_delta *act, *next, *prev = NULL;
5894
5895   for (act = delta; act; act = next)
5896     {
5897       next = act->next;
5898       act->next = prev;
5899       prev = act;
5900
5901       std::swap (act->old_cp, act->new_cp);
5902     }
5903
5904   return prev;
5905 }
5906
5907 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5908    reverted instead.  */
5909
5910 static void
5911 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5912                     struct iv_ca_delta *delta, bool forward)
5913 {
5914   struct cost_pair *from, *to;
5915   struct iv_ca_delta *act;
5916
5917   if (!forward)
5918     delta = iv_ca_delta_reverse (delta);
5919
5920   for (act = delta; act; act = act->next)
5921     {
5922       from = act->old_cp;
5923       to = act->new_cp;
5924       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
5925       iv_ca_set_cp (data, ivs, act->group, to);
5926     }
5927
5928   if (!forward)
5929     iv_ca_delta_reverse (delta);
5930 }
5931
5932 /* Returns true if CAND is used in IVS.  */
5933
5934 static bool
5935 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5936 {
5937   return ivs->n_cand_uses[cand->id] > 0;
5938 }
5939
5940 /* Returns number of induction variable candidates in the set IVS.  */
5941
5942 static unsigned
5943 iv_ca_n_cands (struct iv_ca *ivs)
5944 {
5945   return ivs->n_cands;
5946 }
5947
5948 /* Free the list of changes DELTA.  */
5949
5950 static void
5951 iv_ca_delta_free (struct iv_ca_delta **delta)
5952 {
5953   struct iv_ca_delta *act, *next;
5954
5955   for (act = *delta; act; act = next)
5956     {
5957       next = act->next;
5958       free (act);
5959     }
5960
5961   *delta = NULL;
5962 }
5963
5964 /* Allocates new iv candidates assignment.  */
5965
5966 static struct iv_ca *
5967 iv_ca_new (struct ivopts_data *data)
5968 {
5969   struct iv_ca *nw = XNEW (struct iv_ca);
5970
5971   nw->upto = 0;
5972   nw->bad_groups = 0;
5973   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
5974                                  data->vgroups.length ());
5975   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
5976   nw->cands = BITMAP_ALLOC (NULL);
5977   nw->n_cands = 0;
5978   nw->n_invs = 0;
5979   nw->cand_use_cost = no_cost;
5980   nw->cand_cost = 0;
5981   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
5982   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
5983   nw->cost = no_cost;
5984
5985   return nw;
5986 }
5987
5988 /* Free memory occupied by the set IVS.  */
5989
5990 static void
5991 iv_ca_free (struct iv_ca **ivs)
5992 {
5993   free ((*ivs)->cand_for_group);
5994   free ((*ivs)->n_cand_uses);
5995   BITMAP_FREE ((*ivs)->cands);
5996   free ((*ivs)->n_inv_var_uses);
5997   free ((*ivs)->n_inv_expr_uses);
5998   free (*ivs);
5999   *ivs = NULL;
6000 }
6001
6002 /* Dumps IVS to FILE.  */
6003
6004 static void
6005 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6006 {
6007   unsigned i;
6008   comp_cost cost = iv_ca_cost (ivs);
6009
6010   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6011            cost.complexity);
6012   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6013            ivs->cand_cost, ivs->cand_use_cost.cost,
6014            ivs->cand_use_cost.complexity);
6015   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6016
6017   for (i = 0; i < ivs->upto; i++)
6018     {
6019       struct iv_group *group = data->vgroups[i];
6020       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6021       if (cp)
6022         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6023                  group->id, cp->cand->id, cp->cost.cost,
6024                  cp->cost.complexity);
6025       else
6026         fprintf (file, "   group:%d --> ??\n", group->id);
6027     }
6028
6029   const char *pref = "";
6030   fprintf (file, "  invariant variables: ");
6031   for (i = 1; i <= data->max_inv_var_id; i++)
6032     if (ivs->n_inv_var_uses[i])
6033       {
6034         fprintf (file, "%s%d", pref, i);
6035         pref = ", ";
6036       }
6037
6038   pref = "";
6039   fprintf (file, "\n  invariant expressions: ");
6040   for (i = 1; i <= data->max_inv_expr_id; i++)
6041     if (ivs->n_inv_expr_uses[i])
6042       {
6043         fprintf (file, "%s%d", pref, i);
6044         pref = ", ";
6045       }
6046
6047   fprintf (file, "\n\n");
6048 }
6049
6050 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6051    new set, and store differences in DELTA.  Number of induction variables
6052    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6053    the function will try to find a solution with mimimal iv candidates.  */
6054
6055 static comp_cost
6056 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6057               struct iv_cand *cand, struct iv_ca_delta **delta,
6058               unsigned *n_ivs, bool min_ncand)
6059 {
6060   unsigned i;
6061   comp_cost cost;
6062   struct iv_group *group;
6063   struct cost_pair *old_cp, *new_cp;
6064
6065   *delta = NULL;
6066   for (i = 0; i < ivs->upto; i++)
6067     {
6068       group = data->vgroups[i];
6069       old_cp = iv_ca_cand_for_group (ivs, group);
6070
6071       if (old_cp
6072           && old_cp->cand == cand)
6073         continue;
6074
6075       new_cp = get_group_iv_cost (data, group, cand);
6076       if (!new_cp)
6077         continue;
6078
6079       if (!min_ncand)
6080         {
6081           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6082           /* Skip if new_cp depends on more invariants.  */
6083           if (cmp_invs > 0)
6084             continue;
6085
6086           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6087           /* Skip if new_cp is not cheaper.  */
6088           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6089             continue;
6090         }
6091
6092       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6093     }
6094
6095   iv_ca_delta_commit (data, ivs, *delta, true);
6096   cost = iv_ca_cost (ivs);
6097   if (n_ivs)
6098     *n_ivs = iv_ca_n_cands (ivs);
6099   iv_ca_delta_commit (data, ivs, *delta, false);
6100
6101   return cost;
6102 }
6103
6104 /* Try narrowing set IVS by removing CAND.  Return the cost of
6105    the new set and store the differences in DELTA.  START is
6106    the candidate with which we start narrowing.  */
6107
6108 static comp_cost
6109 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6110               struct iv_cand *cand, struct iv_cand *start,
6111               struct iv_ca_delta **delta)
6112 {
6113   unsigned i, ci;
6114   struct iv_group *group;
6115   struct cost_pair *old_cp, *new_cp, *cp;
6116   bitmap_iterator bi;
6117   struct iv_cand *cnd;
6118   comp_cost cost, best_cost, acost;
6119
6120   *delta = NULL;
6121   for (i = 0; i < data->vgroups.length (); i++)
6122     {
6123       group = data->vgroups[i];
6124
6125       old_cp = iv_ca_cand_for_group (ivs, group);
6126       if (old_cp->cand != cand)
6127         continue;
6128
6129       best_cost = iv_ca_cost (ivs);
6130       /* Start narrowing with START.  */
6131       new_cp = get_group_iv_cost (data, group, start);
6132
6133       if (data->consider_all_candidates)
6134         {
6135           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6136             {
6137               if (ci == cand->id || (start && ci == start->id))
6138                 continue;
6139
6140               cnd = data->vcands[ci];
6141
6142               cp = get_group_iv_cost (data, group, cnd);
6143               if (!cp)
6144                 continue;
6145
6146               iv_ca_set_cp (data, ivs, group, cp);
6147               acost = iv_ca_cost (ivs);
6148
6149               if (acost < best_cost)
6150                 {
6151                   best_cost = acost;
6152                   new_cp = cp;
6153                 }
6154             }
6155         }
6156       else
6157         {
6158           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6159             {
6160               if (ci == cand->id || (start && ci == start->id))
6161                 continue;
6162
6163               cnd = data->vcands[ci];
6164
6165               cp = get_group_iv_cost (data, group, cnd);
6166               if (!cp)
6167                 continue;
6168
6169               iv_ca_set_cp (data, ivs, group, cp);
6170               acost = iv_ca_cost (ivs);
6171
6172               if (acost < best_cost)
6173                 {
6174                   best_cost = acost;
6175                   new_cp = cp;
6176                 }
6177             }
6178         }
6179       /* Restore to old cp for use.  */
6180       iv_ca_set_cp (data, ivs, group, old_cp);
6181
6182       if (!new_cp)
6183         {
6184           iv_ca_delta_free (delta);
6185           return infinite_cost;
6186         }
6187
6188       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6189     }
6190
6191   iv_ca_delta_commit (data, ivs, *delta, true);
6192   cost = iv_ca_cost (ivs);
6193   iv_ca_delta_commit (data, ivs, *delta, false);
6194
6195   return cost;
6196 }
6197
6198 /* Try optimizing the set of candidates IVS by removing candidates different
6199    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6200    differences in DELTA.  */
6201
6202 static comp_cost
6203 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6204              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6205 {
6206   bitmap_iterator bi;
6207   struct iv_ca_delta *act_delta, *best_delta;
6208   unsigned i;
6209   comp_cost best_cost, acost;
6210   struct iv_cand *cand;
6211
6212   best_delta = NULL;
6213   best_cost = iv_ca_cost (ivs);
6214
6215   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6216     {
6217       cand = data->vcands[i];
6218
6219       if (cand == except_cand)
6220         continue;
6221
6222       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6223
6224       if (acost < best_cost)
6225         {
6226           best_cost = acost;
6227           iv_ca_delta_free (&best_delta);
6228           best_delta = act_delta;
6229         }
6230       else
6231         iv_ca_delta_free (&act_delta);
6232     }
6233
6234   if (!best_delta)
6235     {
6236       *delta = NULL;
6237       return best_cost;
6238     }
6239
6240   /* Recurse to possibly remove other unnecessary ivs.  */
6241   iv_ca_delta_commit (data, ivs, best_delta, true);
6242   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6243   iv_ca_delta_commit (data, ivs, best_delta, false);
6244   *delta = iv_ca_delta_join (best_delta, *delta);
6245   return best_cost;
6246 }
6247
6248 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6249    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6250    the corresponding cost_pair, otherwise just return BEST_CP.  */
6251
6252 static struct cost_pair*
6253 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6254                         unsigned int cand_idx, struct iv_cand *old_cand,
6255                         struct cost_pair *best_cp)
6256 {
6257   struct iv_cand *cand;
6258   struct cost_pair *cp;
6259
6260   gcc_assert (old_cand != NULL && best_cp != NULL);
6261   if (cand_idx == old_cand->id)
6262     return best_cp;
6263
6264   cand = data->vcands[cand_idx];
6265   cp = get_group_iv_cost (data, group, cand);
6266   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6267     return cp;
6268
6269   return best_cp;
6270 }
6271
6272 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6273    which are used by more than one iv uses.  For each of those candidates,
6274    this function tries to represent iv uses under that candidate using
6275    other ones with lower local cost, then tries to prune the new set.
6276    If the new set has lower cost, It returns the new cost after recording
6277    candidate replacement in list DELTA.  */
6278
6279 static comp_cost
6280 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6281                struct iv_ca_delta **delta)
6282 {
6283   bitmap_iterator bi, bj;
6284   unsigned int i, j, k;
6285   struct iv_cand *cand;
6286   comp_cost orig_cost, acost;
6287   struct iv_ca_delta *act_delta, *tmp_delta;
6288   struct cost_pair *old_cp, *best_cp = NULL;
6289
6290   *delta = NULL;
6291   orig_cost = iv_ca_cost (ivs);
6292
6293   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6294     {
6295       if (ivs->n_cand_uses[i] == 1
6296           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6297         continue;
6298
6299       cand = data->vcands[i];
6300
6301       act_delta = NULL;
6302       /*  Represent uses under current candidate using other ones with
6303           lower local cost.  */
6304       for (j = 0; j < ivs->upto; j++)
6305         {
6306           struct iv_group *group = data->vgroups[j];
6307           old_cp = iv_ca_cand_for_group (ivs, group);
6308
6309           if (old_cp->cand != cand)
6310             continue;
6311
6312           best_cp = old_cp;
6313           if (data->consider_all_candidates)
6314             for (k = 0; k < data->vcands.length (); k++)
6315               best_cp = cheaper_cost_with_cand (data, group, k,
6316                                                 old_cp->cand, best_cp);
6317           else
6318             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6319               best_cp = cheaper_cost_with_cand (data, group, k,
6320                                                 old_cp->cand, best_cp);
6321
6322           if (best_cp == old_cp)
6323             continue;
6324
6325           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6326         }
6327       /* No need for further prune.  */
6328       if (!act_delta)
6329         continue;
6330
6331       /* Prune the new candidate set.  */
6332       iv_ca_delta_commit (data, ivs, act_delta, true);
6333       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6334       iv_ca_delta_commit (data, ivs, act_delta, false);
6335       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6336
6337       if (acost < orig_cost)
6338         {
6339           *delta = act_delta;
6340           return acost;
6341         }
6342       else
6343         iv_ca_delta_free (&act_delta);
6344     }
6345
6346   return orig_cost;
6347 }
6348
6349 /* Tries to extend the sets IVS in the best possible way in order to
6350    express the GROUP.  If ORIGINALP is true, prefer candidates from
6351    the original set of IVs, otherwise favor important candidates not
6352    based on any memory object.  */
6353
6354 static bool
6355 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6356                   struct iv_group *group, bool originalp)
6357 {
6358   comp_cost best_cost, act_cost;
6359   unsigned i;
6360   bitmap_iterator bi;
6361   struct iv_cand *cand;
6362   struct iv_ca_delta *best_delta = NULL, *act_delta;
6363   struct cost_pair *cp;
6364
6365   iv_ca_add_group (data, ivs, group);
6366   best_cost = iv_ca_cost (ivs);
6367   cp = iv_ca_cand_for_group (ivs, group);
6368   if (cp)
6369     {
6370       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6371       iv_ca_set_no_cp (data, ivs, group);
6372     }
6373
6374   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6375      first try important candidates not based on any memory object.  Only if
6376      this fails, try the specific ones.  Rationale -- in loops with many
6377      variables the best choice often is to use just one generic biv.  If we
6378      added here many ivs specific to the uses, the optimization algorithm later
6379      would be likely to get stuck in a local minimum, thus causing us to create
6380      too many ivs.  The approach from few ivs to more seems more likely to be
6381      successful -- starting from few ivs, replacing an expensive use by a
6382      specific iv should always be a win.  */
6383   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6384     {
6385       cand = data->vcands[i];
6386
6387       if (originalp && cand->pos !=IP_ORIGINAL)
6388         continue;
6389
6390       if (!originalp && cand->iv->base_object != NULL_TREE)
6391         continue;
6392
6393       if (iv_ca_cand_used_p (ivs, cand))
6394         continue;
6395
6396       cp = get_group_iv_cost (data, group, cand);
6397       if (!cp)
6398         continue;
6399
6400       iv_ca_set_cp (data, ivs, group, cp);
6401       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6402                                true);
6403       iv_ca_set_no_cp (data, ivs, group);
6404       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6405
6406       if (act_cost < best_cost)
6407         {
6408           best_cost = act_cost;
6409
6410           iv_ca_delta_free (&best_delta);
6411           best_delta = act_delta;
6412         }
6413       else
6414         iv_ca_delta_free (&act_delta);
6415     }
6416
6417   if (best_cost.infinite_cost_p ())
6418     {
6419       for (i = 0; i < group->n_map_members; i++)
6420         {
6421           cp = group->cost_map + i;
6422           cand = cp->cand;
6423           if (!cand)
6424             continue;
6425
6426           /* Already tried this.  */
6427           if (cand->important)
6428             {
6429               if (originalp && cand->pos == IP_ORIGINAL)
6430                 continue;
6431               if (!originalp && cand->iv->base_object == NULL_TREE)
6432                 continue;
6433             }
6434
6435           if (iv_ca_cand_used_p (ivs, cand))
6436             continue;
6437
6438           act_delta = NULL;
6439           iv_ca_set_cp (data, ivs, group, cp);
6440           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6441           iv_ca_set_no_cp (data, ivs, group);
6442           act_delta = iv_ca_delta_add (group,
6443                                        iv_ca_cand_for_group (ivs, group),
6444                                        cp, act_delta);
6445
6446           if (act_cost < best_cost)
6447             {
6448               best_cost = act_cost;
6449
6450               if (best_delta)
6451                 iv_ca_delta_free (&best_delta);
6452               best_delta = act_delta;
6453             }
6454           else
6455             iv_ca_delta_free (&act_delta);
6456         }
6457     }
6458
6459   iv_ca_delta_commit (data, ivs, best_delta, true);
6460   iv_ca_delta_free (&best_delta);
6461
6462   return !best_cost.infinite_cost_p ();
6463 }
6464
6465 /* Finds an initial assignment of candidates to uses.  */
6466
6467 static struct iv_ca *
6468 get_initial_solution (struct ivopts_data *data, bool originalp)
6469 {
6470   unsigned i;
6471   struct iv_ca *ivs = iv_ca_new (data);
6472
6473   for (i = 0; i < data->vgroups.length (); i++)
6474     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6475       {
6476         iv_ca_free (&ivs);
6477         return NULL;
6478       }
6479
6480   return ivs;
6481 }
6482
6483 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6484    points to a bool variable, this function tries to break local
6485    optimal fixed-point by replacing candidates in IVS if it's true.  */
6486
6487 static bool
6488 try_improve_iv_set (struct ivopts_data *data,
6489                     struct iv_ca *ivs, bool *try_replace_p)
6490 {
6491   unsigned i, n_ivs;
6492   comp_cost acost, best_cost = iv_ca_cost (ivs);
6493   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6494   struct iv_cand *cand;
6495
6496   /* Try extending the set of induction variables by one.  */
6497   for (i = 0; i < data->vcands.length (); i++)
6498     {
6499       cand = data->vcands[i];
6500
6501       if (iv_ca_cand_used_p (ivs, cand))
6502         continue;
6503
6504       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6505       if (!act_delta)
6506         continue;
6507
6508       /* If we successfully added the candidate and the set is small enough,
6509          try optimizing it by removing other candidates.  */
6510       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6511         {
6512           iv_ca_delta_commit (data, ivs, act_delta, true);
6513           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6514           iv_ca_delta_commit (data, ivs, act_delta, false);
6515           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6516         }
6517
6518       if (acost < best_cost)
6519         {
6520           best_cost = acost;
6521           iv_ca_delta_free (&best_delta);
6522           best_delta = act_delta;
6523         }
6524       else
6525         iv_ca_delta_free (&act_delta);
6526     }
6527
6528   if (!best_delta)
6529     {
6530       /* Try removing the candidates from the set instead.  */
6531       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6532
6533       if (!best_delta && *try_replace_p)
6534         {
6535           *try_replace_p = false;
6536           /* So far candidate selecting algorithm tends to choose fewer IVs
6537              so that it can handle cases in which loops have many variables
6538              but the best choice is often to use only one general biv.  One
6539              weakness is it can't handle opposite cases, in which different
6540              candidates should be chosen with respect to each use.  To solve
6541              the problem, we replace candidates in a manner described by the
6542              comments of iv_ca_replace, thus give general algorithm a chance
6543              to break local optimal fixed-point in these cases.  */
6544           best_cost = iv_ca_replace (data, ivs, &best_delta);
6545         }
6546
6547       if (!best_delta)
6548         return false;
6549     }
6550
6551   iv_ca_delta_commit (data, ivs, best_delta, true);
6552   gcc_assert (best_cost == iv_ca_cost (ivs));
6553   iv_ca_delta_free (&best_delta);
6554   return true;
6555 }
6556
6557 /* Attempts to find the optimal set of induction variables.  We do simple
6558    greedy heuristic -- we try to replace at most one candidate in the selected
6559    solution and remove the unused ivs while this improves the cost.  */
6560
6561 static struct iv_ca *
6562 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6563 {
6564   struct iv_ca *set;
6565   bool try_replace_p = true;
6566
6567   /* Get the initial solution.  */
6568   set = get_initial_solution (data, originalp);
6569   if (!set)
6570     {
6571       if (dump_file && (dump_flags & TDF_DETAILS))
6572         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6573       return NULL;
6574     }
6575
6576   if (dump_file && (dump_flags & TDF_DETAILS))
6577     {
6578       fprintf (dump_file, "Initial set of candidates:\n");
6579       iv_ca_dump (data, dump_file, set);
6580     }
6581
6582   while (try_improve_iv_set (data, set, &try_replace_p))
6583     {
6584       if (dump_file && (dump_flags & TDF_DETAILS))
6585         {
6586           fprintf (dump_file, "Improved to:\n");
6587           iv_ca_dump (data, dump_file, set);
6588         }
6589     }
6590
6591   return set;
6592 }
6593
6594 static struct iv_ca *
6595 find_optimal_iv_set (struct ivopts_data *data)
6596 {
6597   unsigned i;
6598   comp_cost cost, origcost;
6599   struct iv_ca *set, *origset;
6600
6601   /* Determine the cost based on a strategy that starts with original IVs,
6602      and try again using a strategy that prefers candidates not based
6603      on any IVs.  */
6604   origset = find_optimal_iv_set_1 (data, true);
6605   set = find_optimal_iv_set_1 (data, false);
6606
6607   if (!origset && !set)
6608     return NULL;
6609
6610   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6611   cost = set ? iv_ca_cost (set) : infinite_cost;
6612
6613   if (dump_file && (dump_flags & TDF_DETAILS))
6614     {
6615       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6616                origcost.cost, origcost.complexity);
6617       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6618                cost.cost, cost.complexity);
6619     }
6620
6621   /* Choose the one with the best cost.  */
6622   if (origcost <= cost)
6623     {
6624       if (set)
6625         iv_ca_free (&set);
6626       set = origset;
6627     }
6628   else if (origset)
6629     iv_ca_free (&origset);
6630
6631   for (i = 0; i < data->vgroups.length (); i++)
6632     {
6633       struct iv_group *group = data->vgroups[i];
6634       group->selected = iv_ca_cand_for_group (set, group)->cand;
6635     }
6636
6637   return set;
6638 }
6639
6640 /* Creates a new induction variable corresponding to CAND.  */
6641
6642 static void
6643 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6644 {
6645   gimple_stmt_iterator incr_pos;
6646   tree base;
6647   struct iv_use *use;
6648   struct iv_group *group;
6649   bool after = false;
6650
6651   gcc_assert (cand->iv != NULL);
6652
6653   switch (cand->pos)
6654     {
6655     case IP_NORMAL:
6656       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6657       break;
6658
6659     case IP_END:
6660       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6661       after = true;
6662       break;
6663
6664     case IP_AFTER_USE:
6665       after = true;
6666       /* fall through */
6667     case IP_BEFORE_USE:
6668       incr_pos = gsi_for_stmt (cand->incremented_at);
6669       break;
6670
6671     case IP_ORIGINAL:
6672       /* Mark that the iv is preserved.  */
6673       name_info (data, cand->var_before)->preserve_biv = true;
6674       name_info (data, cand->var_after)->preserve_biv = true;
6675
6676       /* Rewrite the increment so that it uses var_before directly.  */
6677       use = find_interesting_uses_op (data, cand->var_after);
6678       group = data->vgroups[use->group_id];
6679       group->selected = cand;
6680       return;
6681     }
6682
6683   gimple_add_tmp_var (cand->var_before);
6684
6685   base = unshare_expr (cand->iv->base);
6686
6687   create_iv (base, unshare_expr (cand->iv->step),
6688              cand->var_before, data->current_loop,
6689              &incr_pos, after, &cand->var_before, &cand->var_after);
6690 }
6691
6692 /* Creates new induction variables described in SET.  */
6693
6694 static void
6695 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6696 {
6697   unsigned i;
6698   struct iv_cand *cand;
6699   bitmap_iterator bi;
6700
6701   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6702     {
6703       cand = data->vcands[i];
6704       create_new_iv (data, cand);
6705     }
6706
6707   if (dump_file && (dump_flags & TDF_DETAILS))
6708     {
6709       fprintf (dump_file, "Selected IV set for loop %d",
6710                data->current_loop->num);
6711       if (data->loop_loc != UNKNOWN_LOCATION)
6712         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6713                  LOCATION_LINE (data->loop_loc));
6714       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6715                avg_loop_niter (data->current_loop));
6716       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6717       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6718         {
6719           cand = data->vcands[i];
6720           dump_cand (dump_file, cand);
6721         }
6722       fprintf (dump_file, "\n");
6723     }
6724 }
6725
6726 /* Rewrites USE (definition of iv used in a nonlinear expression)
6727    using candidate CAND.  */
6728
6729 static void
6730 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6731                             struct iv_use *use, struct iv_cand *cand)
6732 {
6733   gassign *ass;
6734   gimple_stmt_iterator bsi;
6735   tree comp, type = get_use_type (use), tgt;
6736
6737   /* An important special case -- if we are asked to express value of
6738      the original iv by itself, just exit; there is no need to
6739      introduce a new computation (that might also need casting the
6740      variable to unsigned and back).  */
6741   if (cand->pos == IP_ORIGINAL
6742       && cand->incremented_at == use->stmt)
6743     {
6744       tree op = NULL_TREE;
6745       enum tree_code stmt_code;
6746
6747       gcc_assert (is_gimple_assign (use->stmt));
6748       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6749
6750       /* Check whether we may leave the computation unchanged.
6751          This is the case only if it does not rely on other
6752          computations in the loop -- otherwise, the computation
6753          we rely upon may be removed in remove_unused_ivs,
6754          thus leading to ICE.  */
6755       stmt_code = gimple_assign_rhs_code (use->stmt);
6756       if (stmt_code == PLUS_EXPR
6757           || stmt_code == MINUS_EXPR
6758           || stmt_code == POINTER_PLUS_EXPR)
6759         {
6760           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6761             op = gimple_assign_rhs2 (use->stmt);
6762           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6763             op = gimple_assign_rhs1 (use->stmt);
6764         }
6765
6766       if (op != NULL_TREE)
6767         {
6768           if (expr_invariant_in_loop_p (data->current_loop, op))
6769             return;
6770           if (TREE_CODE (op) == SSA_NAME)
6771             {
6772               struct iv *iv = get_iv (data, op);
6773               if (iv != NULL && integer_zerop (iv->step))
6774                 return;
6775             }
6776         }
6777     }
6778
6779   switch (gimple_code (use->stmt))
6780     {
6781     case GIMPLE_PHI:
6782       tgt = PHI_RESULT (use->stmt);
6783
6784       /* If we should keep the biv, do not replace it.  */
6785       if (name_info (data, tgt)->preserve_biv)
6786         return;
6787
6788       bsi = gsi_after_labels (gimple_bb (use->stmt));
6789       break;
6790
6791     case GIMPLE_ASSIGN:
6792       tgt = gimple_assign_lhs (use->stmt);
6793       bsi = gsi_for_stmt (use->stmt);
6794       break;
6795
6796     default:
6797       gcc_unreachable ();
6798     }
6799
6800   aff_tree aff_inv, aff_var;
6801   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6802                               use, cand, &aff_inv, &aff_var))
6803     gcc_unreachable ();
6804
6805   unshare_aff_combination (&aff_inv);
6806   unshare_aff_combination (&aff_var);
6807   /* Prefer CSE opportunity than loop invariant by adding offset at last
6808      so that iv_uses have different offsets can be CSEed.  */
6809   widest_int offset = aff_inv.offset;
6810   aff_inv.offset = 0;
6811
6812   gimple_seq stmt_list = NULL, seq = NULL;
6813   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6814   tree comp_op2 = aff_combination_to_tree (&aff_var);
6815   gcc_assert (comp_op1 && comp_op2);
6816
6817   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6818   gimple_seq_add_seq (&stmt_list, seq);
6819   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6820   gimple_seq_add_seq (&stmt_list, seq);
6821
6822   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6823     std::swap (comp_op1, comp_op2);
6824
6825   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6826     {
6827       comp = fold_build_pointer_plus (comp_op1,
6828                                       fold_convert (sizetype, comp_op2));
6829       comp = fold_build_pointer_plus (comp,
6830                                       wide_int_to_tree (sizetype, offset));
6831     }
6832   else
6833     {
6834       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6835                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
6836       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6837                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6838     }
6839
6840   comp = fold_convert (type, comp);
6841   if (!valid_gimple_rhs_p (comp)
6842       || (gimple_code (use->stmt) != GIMPLE_PHI
6843           /* We can't allow re-allocating the stmt as it might be pointed
6844              to still.  */
6845           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6846               >= gimple_num_ops (gsi_stmt (bsi)))))
6847     {
6848       comp = force_gimple_operand (comp, &seq, true, NULL);
6849       gimple_seq_add_seq (&stmt_list, seq);
6850       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6851         {
6852           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6853           /* As this isn't a plain copy we have to reset alignment
6854              information.  */
6855           if (SSA_NAME_PTR_INFO (comp))
6856             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6857         }
6858     }
6859
6860   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
6861   if (gimple_code (use->stmt) == GIMPLE_PHI)
6862     {
6863       ass = gimple_build_assign (tgt, comp);
6864       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6865
6866       bsi = gsi_for_stmt (use->stmt);
6867       remove_phi_node (&bsi, false);
6868     }
6869   else
6870     {
6871       gimple_assign_set_rhs_from_tree (&bsi, comp);
6872       use->stmt = gsi_stmt (bsi);
6873     }
6874 }
6875
6876 /* Performs a peephole optimization to reorder the iv update statement with
6877    a mem ref to enable instruction combining in later phases. The mem ref uses
6878    the iv value before the update, so the reordering transformation requires
6879    adjustment of the offset. CAND is the selected IV_CAND.
6880
6881    Example:
6882
6883    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6884    iv2 = iv1 + 1;
6885
6886    if (t < val)      (1)
6887      goto L;
6888    goto Head;
6889
6890
6891    directly propagating t over to (1) will introduce overlapping live range
6892    thus increase register pressure. This peephole transform it into:
6893
6894
6895    iv2 = iv1 + 1;
6896    t = MEM_REF (base, iv2, 8, 8);
6897    if (t < val)
6898      goto L;
6899    goto Head;
6900 */
6901
6902 static void
6903 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6904 {
6905   tree var_after;
6906   gimple *iv_update, *stmt;
6907   basic_block bb;
6908   gimple_stmt_iterator gsi, gsi_iv;
6909
6910   if (cand->pos != IP_NORMAL)
6911     return;
6912
6913   var_after = cand->var_after;
6914   iv_update = SSA_NAME_DEF_STMT (var_after);
6915
6916   bb = gimple_bb (iv_update);
6917   gsi = gsi_last_nondebug_bb (bb);
6918   stmt = gsi_stmt (gsi);
6919
6920   /* Only handle conditional statement for now.  */
6921   if (gimple_code (stmt) != GIMPLE_COND)
6922     return;
6923
6924   gsi_prev_nondebug (&gsi);
6925   stmt = gsi_stmt (gsi);
6926   if (stmt != iv_update)
6927     return;
6928
6929   gsi_prev_nondebug (&gsi);
6930   if (gsi_end_p (gsi))
6931     return;
6932
6933   stmt = gsi_stmt (gsi);
6934   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6935     return;
6936
6937   if (stmt != use->stmt)
6938     return;
6939
6940   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6941     return;
6942
6943   if (dump_file && (dump_flags & TDF_DETAILS))
6944     {
6945       fprintf (dump_file, "Reordering \n");
6946       print_gimple_stmt (dump_file, iv_update, 0);
6947       print_gimple_stmt (dump_file, use->stmt, 0);
6948       fprintf (dump_file, "\n");
6949     }
6950
6951   gsi = gsi_for_stmt (use->stmt);
6952   gsi_iv = gsi_for_stmt (iv_update);
6953   gsi_move_before (&gsi_iv, &gsi);
6954
6955   cand->pos = IP_BEFORE_USE;
6956   cand->incremented_at = use->stmt;
6957 }
6958
6959 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6960
6961 static void
6962 rewrite_use_address (struct ivopts_data *data,
6963                      struct iv_use *use, struct iv_cand *cand)
6964 {
6965   aff_tree aff;
6966   bool ok;
6967
6968   adjust_iv_update_pos (cand, use);
6969   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
6970   gcc_assert (ok);
6971   unshare_aff_combination (&aff);
6972
6973   /* To avoid undefined overflow problems, all IV candidates use unsigned
6974      integer types.  The drawback is that this makes it impossible for
6975      create_mem_ref to distinguish an IV that is based on a memory object
6976      from one that represents simply an offset.
6977
6978      To work around this problem, we pass a hint to create_mem_ref that
6979      indicates which variable (if any) in aff is an IV based on a memory
6980      object.  Note that we only consider the candidate.  If this is not
6981      based on an object, the base of the reference is in some subexpression
6982      of the use -- but these will use pointer types, so they are recognized
6983      by the create_mem_ref heuristics anyway.  */
6984   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
6985   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
6986   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6987   tree type = TREE_TYPE (*use->op_p);
6988   unsigned int align = get_object_alignment (*use->op_p);
6989   if (align != TYPE_ALIGN (type))
6990     type = build_aligned_type (type, align);
6991
6992   tree ref = create_mem_ref (&bsi, type, &aff,
6993                              reference_alias_ptr_type (*use->op_p),
6994                              iv, base_hint, data->speed);
6995
6996   copy_ref_info (ref, *use->op_p);
6997   *use->op_p = ref;
6998 }
6999
7000 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7001    candidate CAND.  */
7002
7003 static void
7004 rewrite_use_compare (struct ivopts_data *data,
7005                      struct iv_use *use, struct iv_cand *cand)
7006 {
7007   tree comp, op, bound;
7008   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7009   enum tree_code compare;
7010   struct iv_group *group = data->vgroups[use->group_id];
7011   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7012
7013   bound = cp->value;
7014   if (bound)
7015     {
7016       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7017       tree var_type = TREE_TYPE (var);
7018       gimple_seq stmts;
7019
7020       if (dump_file && (dump_flags & TDF_DETAILS))
7021         {
7022           fprintf (dump_file, "Replacing exit test: ");
7023           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7024         }
7025       compare = cp->comp;
7026       bound = unshare_expr (fold_convert (var_type, bound));
7027       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7028       if (stmts)
7029         gsi_insert_seq_on_edge_immediate (
7030                 loop_preheader_edge (data->current_loop),
7031                 stmts);
7032
7033       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7034       gimple_cond_set_lhs (cond_stmt, var);
7035       gimple_cond_set_code (cond_stmt, compare);
7036       gimple_cond_set_rhs (cond_stmt, op);
7037       return;
7038     }
7039
7040   /* The induction variable elimination failed; just express the original
7041      giv.  */
7042   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7043   gcc_assert (comp != NULL_TREE);
7044   gcc_assert (use->op_p != NULL);
7045   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7046                                          SSA_NAME_VAR (*use->op_p),
7047                                          true, GSI_SAME_STMT);
7048 }
7049
7050 /* Rewrite the groups using the selected induction variables.  */
7051
7052 static void
7053 rewrite_groups (struct ivopts_data *data)
7054 {
7055   unsigned i, j;
7056
7057   for (i = 0; i < data->vgroups.length (); i++)
7058     {
7059       struct iv_group *group = data->vgroups[i];
7060       struct iv_cand *cand = group->selected;
7061
7062       gcc_assert (cand);
7063
7064       if (group->type == USE_NONLINEAR_EXPR)
7065         {
7066           for (j = 0; j < group->vuses.length (); j++)
7067             {
7068               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7069               update_stmt (group->vuses[j]->stmt);
7070             }
7071         }
7072       else if (group->type == USE_ADDRESS)
7073         {
7074           for (j = 0; j < group->vuses.length (); j++)
7075             {
7076               rewrite_use_address (data, group->vuses[j], cand);
7077               update_stmt (group->vuses[j]->stmt);
7078             }
7079         }
7080       else
7081         {
7082           gcc_assert (group->type == USE_COMPARE);
7083
7084           for (j = 0; j < group->vuses.length (); j++)
7085             {
7086               rewrite_use_compare (data, group->vuses[j], cand);
7087               update_stmt (group->vuses[j]->stmt);
7088             }
7089         }
7090     }
7091 }
7092
7093 /* Removes the ivs that are not used after rewriting.  */
7094
7095 static void
7096 remove_unused_ivs (struct ivopts_data *data)
7097 {
7098   unsigned j;
7099   bitmap_iterator bi;
7100   bitmap toremove = BITMAP_ALLOC (NULL);
7101
7102   /* Figure out an order in which to release SSA DEFs so that we don't
7103      release something that we'd have to propagate into a debug stmt
7104      afterwards.  */
7105   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7106     {
7107       struct version_info *info;
7108
7109       info = ver_info (data, j);
7110       if (info->iv
7111           && !integer_zerop (info->iv->step)
7112           && !info->inv_id
7113           && !info->iv->nonlin_use
7114           && !info->preserve_biv)
7115         {
7116           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7117
7118           tree def = info->iv->ssa_name;
7119
7120           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7121             {
7122               imm_use_iterator imm_iter;
7123               use_operand_p use_p;
7124               gimple *stmt;
7125               int count = 0;
7126
7127               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7128                 {
7129                   if (!gimple_debug_bind_p (stmt))
7130                     continue;
7131
7132                   /* We just want to determine whether to do nothing
7133                      (count == 0), to substitute the computed
7134                      expression into a single use of the SSA DEF by
7135                      itself (count == 1), or to use a debug temp
7136                      because the SSA DEF is used multiple times or as
7137                      part of a larger expression (count > 1). */
7138                   count++;
7139                   if (gimple_debug_bind_get_value (stmt) != def)
7140                     count++;
7141
7142                   if (count > 1)
7143                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7144                 }
7145
7146               if (!count)
7147                 continue;
7148
7149               struct iv_use dummy_use;
7150               struct iv_cand *best_cand = NULL, *cand;
7151               unsigned i, best_pref = 0, cand_pref;
7152
7153               memset (&dummy_use, 0, sizeof (dummy_use));
7154               dummy_use.iv = info->iv;
7155               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7156                 {
7157                   cand = data->vgroups[i]->selected;
7158                   if (cand == best_cand)
7159                     continue;
7160                   cand_pref = operand_equal_p (cand->iv->step,
7161                                                info->iv->step, 0)
7162                     ? 4 : 0;
7163                   cand_pref
7164                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7165                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7166                     ? 2 : 0;
7167                   cand_pref
7168                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7169                     ? 1 : 0;
7170                   if (best_cand == NULL || best_pref < cand_pref)
7171                     {
7172                       best_cand = cand;
7173                       best_pref = cand_pref;
7174                     }
7175                 }
7176
7177               if (!best_cand)
7178                 continue;
7179
7180               tree comp = get_computation_at (data->current_loop,
7181                                               SSA_NAME_DEF_STMT (def),
7182                                               &dummy_use, best_cand);
7183               if (!comp)
7184                 continue;
7185
7186               if (count > 1)
7187                 {
7188                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7189                   DECL_ARTIFICIAL (vexpr) = 1;
7190                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7191                   if (SSA_NAME_VAR (def))
7192                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7193                   else
7194                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7195                   gdebug *def_temp
7196                     = gimple_build_debug_bind (vexpr, comp, NULL);
7197                   gimple_stmt_iterator gsi;
7198
7199                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7200                     gsi = gsi_after_labels (gimple_bb
7201                                             (SSA_NAME_DEF_STMT (def)));
7202                   else
7203                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7204
7205                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7206                   comp = vexpr;
7207                 }
7208
7209               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7210                 {
7211                   if (!gimple_debug_bind_p (stmt))
7212                     continue;
7213
7214                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7215                     SET_USE (use_p, comp);
7216
7217                   update_stmt (stmt);
7218                 }
7219             }
7220         }
7221     }
7222
7223   release_defs_bitset (toremove);
7224
7225   BITMAP_FREE (toremove);
7226 }
7227
7228 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7229    for hash_map::traverse.  */
7230
7231 bool
7232 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7233 {
7234   free (value);
7235   return true;
7236 }
7237
7238 /* Frees data allocated by the optimization of a single loop.  */
7239
7240 static void
7241 free_loop_data (struct ivopts_data *data)
7242 {
7243   unsigned i, j;
7244   bitmap_iterator bi;
7245   tree obj;
7246
7247   if (data->niters)
7248     {
7249       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7250       delete data->niters;
7251       data->niters = NULL;
7252     }
7253
7254   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7255     {
7256       struct version_info *info;
7257
7258       info = ver_info (data, i);
7259       info->iv = NULL;
7260       info->has_nonlin_use = false;
7261       info->preserve_biv = false;
7262       info->inv_id = 0;
7263     }
7264   bitmap_clear (data->relevant);
7265   bitmap_clear (data->important_candidates);
7266
7267   for (i = 0; i < data->vgroups.length (); i++)
7268     {
7269       struct iv_group *group = data->vgroups[i];
7270
7271       for (j = 0; j < group->vuses.length (); j++)
7272         free (group->vuses[j]);
7273       group->vuses.release ();
7274
7275       BITMAP_FREE (group->related_cands);
7276       for (j = 0; j < group->n_map_members; j++)
7277         {
7278           if (group->cost_map[j].inv_vars)
7279             BITMAP_FREE (group->cost_map[j].inv_vars);
7280           if (group->cost_map[j].inv_exprs)
7281             BITMAP_FREE (group->cost_map[j].inv_exprs);
7282         }
7283
7284       free (group->cost_map);
7285       free (group);
7286     }
7287   data->vgroups.truncate (0);
7288
7289   for (i = 0; i < data->vcands.length (); i++)
7290     {
7291       struct iv_cand *cand = data->vcands[i];
7292
7293       if (cand->inv_vars)
7294         BITMAP_FREE (cand->inv_vars);
7295       if (cand->inv_exprs)
7296         BITMAP_FREE (cand->inv_exprs);
7297       free (cand);
7298     }
7299   data->vcands.truncate (0);
7300
7301   if (data->version_info_size < num_ssa_names)
7302     {
7303       data->version_info_size = 2 * num_ssa_names;
7304       free (data->version_info);
7305       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7306     }
7307
7308   data->max_inv_var_id = 0;
7309   data->max_inv_expr_id = 0;
7310
7311   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7312     SET_DECL_RTL (obj, NULL_RTX);
7313
7314   decl_rtl_to_reset.truncate (0);
7315
7316   data->inv_expr_tab->empty ();
7317
7318   data->iv_common_cand_tab->empty ();
7319   data->iv_common_cands.truncate (0);
7320 }
7321
7322 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7323    loop tree.  */
7324
7325 static void
7326 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7327 {
7328   free_loop_data (data);
7329   free (data->version_info);
7330   BITMAP_FREE (data->relevant);
7331   BITMAP_FREE (data->important_candidates);
7332
7333   decl_rtl_to_reset.release ();
7334   data->vgroups.release ();
7335   data->vcands.release ();
7336   delete data->inv_expr_tab;
7337   data->inv_expr_tab = NULL;
7338   free_affine_expand_cache (&data->name_expansion_cache);
7339   delete data->iv_common_cand_tab;
7340   data->iv_common_cand_tab = NULL;
7341   data->iv_common_cands.release ();
7342   obstack_free (&data->iv_obstack, NULL);
7343 }
7344
7345 /* Returns true if the loop body BODY includes any function calls.  */
7346
7347 static bool
7348 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7349 {
7350   gimple_stmt_iterator gsi;
7351   unsigned i;
7352
7353   for (i = 0; i < num_nodes; i++)
7354     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7355       {
7356         gimple *stmt = gsi_stmt (gsi);
7357         if (is_gimple_call (stmt)
7358             && !gimple_call_internal_p (stmt)
7359             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7360           return true;
7361       }
7362   return false;
7363 }
7364
7365 /* Optimizes the LOOP.  Returns true if anything changed.  */
7366
7367 static bool
7368 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7369 {
7370   bool changed = false;
7371   struct iv_ca *iv_ca;
7372   edge exit = single_dom_exit (loop);
7373   basic_block *body;
7374
7375   gcc_assert (!data->niters);
7376   data->current_loop = loop;
7377   data->loop_loc = find_loop_location (loop);
7378   data->speed = optimize_loop_for_speed_p (loop);
7379
7380   if (dump_file && (dump_flags & TDF_DETAILS))
7381     {
7382       fprintf (dump_file, "Processing loop %d", loop->num);
7383       if (data->loop_loc != UNKNOWN_LOCATION)
7384         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7385                  LOCATION_LINE (data->loop_loc));
7386       fprintf (dump_file, "\n");
7387
7388       if (exit)
7389         {
7390           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7391                    exit->src->index, exit->dest->index);
7392           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7393           fprintf (dump_file, "\n");
7394         }
7395
7396       fprintf (dump_file, "\n");
7397     }
7398
7399   body = get_loop_body (loop);
7400   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7401   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7402   free (body);
7403
7404   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7405
7406   /* For each ssa name determines whether it behaves as an induction variable
7407      in some loop.  */
7408   if (!find_induction_variables (data))
7409     goto finish;
7410
7411   /* Finds interesting uses (item 1).  */
7412   find_interesting_uses (data);
7413   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7414     goto finish;
7415
7416   /* Finds candidates for the induction variables (item 2).  */
7417   find_iv_candidates (data);
7418
7419   /* Calculates the costs (item 3, part 1).  */
7420   determine_iv_costs (data);
7421   determine_group_iv_costs (data);
7422   determine_set_costs (data);
7423
7424   /* Find the optimal set of induction variables (item 3, part 2).  */
7425   iv_ca = find_optimal_iv_set (data);
7426   if (!iv_ca)
7427     goto finish;
7428   changed = true;
7429
7430   /* Create the new induction variables (item 4, part 1).  */
7431   create_new_ivs (data, iv_ca);
7432   iv_ca_free (&iv_ca);
7433
7434   /* Rewrite the uses (item 4, part 2).  */
7435   rewrite_groups (data);
7436
7437   /* Remove the ivs that are unused after rewriting.  */
7438   remove_unused_ivs (data);
7439
7440   /* We have changed the structure of induction variables; it might happen
7441      that definitions in the scev database refer to some of them that were
7442      eliminated.  */
7443   scev_reset ();
7444
7445 finish:
7446   free_loop_data (data);
7447
7448   return changed;
7449 }
7450
7451 /* Main entry point.  Optimizes induction variables in loops.  */
7452
7453 void
7454 tree_ssa_iv_optimize (void)
7455 {
7456   struct loop *loop;
7457   struct ivopts_data data;
7458
7459   tree_ssa_iv_optimize_init (&data);
7460
7461   /* Optimize the loops starting with the innermost ones.  */
7462   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7463     {
7464       if (dump_file && (dump_flags & TDF_DETAILS))
7465         flow_loop_dump (loop, dump_file, NULL, 1);
7466
7467       tree_ssa_iv_optimize_loop (&data, loop);
7468     }
7469
7470   tree_ssa_iv_optimize_finalize (&data);
7471 }
7472
7473 #include "gt-tree-ssa-loop-ivopts.h"