gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "tm_p.h"
  79 #include "ssa.h"
  80 #include "expmed.h"
  81 #include "insn-config.h"
  82 #include "emit-rtl.h"
  83 #include "recog.h"
  84 #include "cgraph.h"
  85 #include "gimple-pretty-print.h"
  86 #include "alias.h"
  87 #include "fold-const.h"
  88 #include "stor-layout.h"
  89 #include "tree-eh.h"
  90 #include "gimplify.h"
  91 #include "gimple-iterator.h"
  92 #include "gimplify-me.h"
  93 #include "tree-cfg.h"
  94 #include "tree-ssa-loop-ivopts.h"
  95 #include "tree-ssa-loop-manip.h"
  96 #include "tree-ssa-loop-niter.h"
  97 #include "tree-ssa-loop.h"
  98 #include "explow.h"
  99 #include "expr.h"
 100 #include "tree-dfa.h"
 101 #include "tree-ssa.h"
 102 #include "cfgloop.h"
 103 #include "tree-scalar-evolution.h"
 104 #include "params.h"
 105 #include "tree-affine.h"
 106 #include "tree-ssa-propagate.h"
 107 #include "tree-ssa-address.h"
 108 #include "builtins.h"
 109 #include "tree-vectorizer.h"
 110
 111 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 112    cost of different addressing modes.  This should be moved to a TBD
 113    interface between the GIMPLE and RTL worlds.  */
 114
 115 /* The infinite cost.  */
 116 #define INFTY 10000000
 117
 118 /* Returns the expected number of loop iterations for LOOP.
 119    The average trip count is computed from profile data if it
 120    exists. */
 121
 122 static inline HOST_WIDE_INT
 123 avg_loop_niter (struct loop *loop)
 124 {
 125   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 126   if (niter == -1)
 127     {
 128       niter = likely_max_stmt_executions_int (loop);
 129
 130       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
 131         return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 132     }
 133
 134   return niter;
 135 }
 136
 137 struct iv_use;
 138
 139 /* Representation of the induction variable.  */
 140 struct iv
 141 {
 142   tree base;            /* Initial value of the iv.  */
 143   tree base_object;     /* A memory object to that the induction variable points.  */
 144   tree step;            /* Step of the iv (constant only).  */
 145   tree ssa_name;        /* The ssa name with the value.  */
 146   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 147   bool biv_p;           /* Is it a biv?  */
 148   bool no_overflow;     /* True if the iv doesn't overflow.  */
 149   bool have_address_use;/* For biv, indicate if it's used in any address
 150                            type use.  */
 151 };
 152
 153 /* Per-ssa version information (induction variable descriptions, etc.).  */
 154 struct version_info
 155 {
 156   tree name;            /* The ssa name.  */
 157   struct iv *iv;        /* Induction variable description.  */
 158   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 159                            an expression that is not an induction variable.  */
 160   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 161   unsigned inv_id;      /* Id of an invariant.  */
 162 };
 163
 164 /* Types of uses.  */
 165 enum use_type
 166 {
 167   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 168   USE_ADDRESS,          /* Use in an address.  */
 169   USE_COMPARE           /* Use is a compare.  */
 170 };
 171
 172 /* Cost of a computation.  */
 173 struct comp_cost
 174 {
 175   comp_cost (): cost (0), complexity (0), scratch (0)
 176   {}
 177
 178   comp_cost (int cost, unsigned complexity, int scratch = 0)
 179     : cost (cost), complexity (complexity), scratch (scratch)
 180   {}
 181
 182   /* Returns true if COST is infinite.  */
 183   bool infinite_cost_p ();
 184
 185   /* Adds costs COST1 and COST2.  */
 186   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 187
 188   /* Adds COST to the comp_cost.  */
 189   comp_cost operator+= (comp_cost cost);
 190
 191   /* Adds constant C to this comp_cost.  */
 192   comp_cost operator+= (HOST_WIDE_INT c);
 193
 194   /* Subtracts constant C to this comp_cost.  */
 195   comp_cost operator-= (HOST_WIDE_INT c);
 196
 197   /* Divide the comp_cost by constant C.  */
 198   comp_cost operator/= (HOST_WIDE_INT c);
 199
 200   /* Multiply the comp_cost by constant C.  */
 201   comp_cost operator*= (HOST_WIDE_INT c);
 202
 203   /* Subtracts costs COST1 and COST2.  */
 204   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 205
 206   /* Subtracts COST from this comp_cost.  */
 207   comp_cost operator-= (comp_cost cost);
 208
 209   /* Returns true if COST1 is smaller than COST2.  */
 210   friend bool operator< (comp_cost cost1, comp_cost cost2);
 211
 212   /* Returns true if COST1 and COST2 are equal.  */
 213   friend bool operator== (comp_cost cost1, comp_cost cost2);
 214
 215   /* Returns true if COST1 is smaller or equal than COST2.  */
 216   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 217
 218   int cost;             /* The runtime cost.  */
 219   unsigned complexity;  /* The estimate of the complexity of the code for
 220                            the computation (in no concrete units --
 221                            complexity field should be larger for more
 222                            complex expressions and addressing modes).  */
 223   int scratch;          /* Scratch used during cost computation.  */
 224 };
 225
 226 static const comp_cost no_cost;
 227 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
 228
 229 bool
 230 comp_cost::infinite_cost_p ()
 231 {
 232   return cost == INFTY;
 233 }
 234
 235 comp_cost
 236 operator+ (comp_cost cost1, comp_cost cost2)
 237 {
 238   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 239     return infinite_cost;
 240
 241   cost1.cost += cost2.cost;
 242   cost1.complexity += cost2.complexity;
 243
 244   return cost1;
 245 }
 246
 247 comp_cost
 248 operator- (comp_cost cost1, comp_cost cost2)
 249 {
 250   if (cost1.infinite_cost_p ())
 251     return infinite_cost;
 252
 253   gcc_assert (!cost2.infinite_cost_p ());
 254
 255   cost1.cost -= cost2.cost;
 256   cost1.complexity -= cost2.complexity;
 257
 258   return cost1;
 259 }
 260
 261 comp_cost
 262 comp_cost::operator+= (comp_cost cost)
 263 {
 264   *this = *this + cost;
 265   return *this;
 266 }
 267
 268 comp_cost
 269 comp_cost::operator+= (HOST_WIDE_INT c)
 270 {
 271   if (infinite_cost_p ())
 272     return *this;
 273
 274   this->cost += c;
 275
 276   return *this;
 277 }
 278
 279 comp_cost
 280 comp_cost::operator-= (HOST_WIDE_INT c)
 281 {
 282   if (infinite_cost_p ())
 283     return *this;
 284
 285   this->cost -= c;
 286
 287   return *this;
 288 }
 289
 290 comp_cost
 291 comp_cost::operator/= (HOST_WIDE_INT c)
 292 {
 293   if (infinite_cost_p ())
 294     return *this;
 295
 296   this->cost /= c;
 297
 298   return *this;
 299 }
 300
 301 comp_cost
 302 comp_cost::operator*= (HOST_WIDE_INT c)
 303 {
 304   if (infinite_cost_p ())
 305     return *this;
 306
 307   this->cost *= c;
 308
 309   return *this;
 310 }
 311
 312 comp_cost
 313 comp_cost::operator-= (comp_cost cost)
 314 {
 315   *this = *this - cost;
 316   return *this;
 317 }
 318
 319 bool
 320 operator< (comp_cost cost1, comp_cost cost2)
 321 {
 322   if (cost1.cost == cost2.cost)
 323     return cost1.complexity < cost2.complexity;
 324
 325   return cost1.cost < cost2.cost;
 326 }
 327
 328 bool
 329 operator== (comp_cost cost1, comp_cost cost2)
 330 {
 331   return cost1.cost == cost2.cost
 332     && cost1.complexity == cost2.complexity;
 333 }
 334
 335 bool
 336 operator<= (comp_cost cost1, comp_cost cost2)
 337 {
 338   return cost1 < cost2 || cost1 == cost2;
 339 }
 340
 341 struct iv_inv_expr_ent;
 342
 343 /* The candidate - cost pair.  */
 344 struct cost_pair
 345 {
 346   struct iv_cand *cand; /* The candidate.  */
 347   comp_cost cost;       /* The cost.  */
 348   bitmap depends_on;    /* The list of invariants that have to be
 349                            preserved.  */
 350   tree value;           /* For final value elimination, the expression for
 351                            the final value of the iv.  For iv elimination,
 352                            the new bound to compare with.  */
 353   enum tree_code comp;  /* For iv elimination, the comparison.  */
 354   iv_inv_expr_ent *inv_expr; /* Loop invariant expression.  */
 355 };
 356
 357 /* Use.  */
 358 struct iv_use
 359 {
 360   unsigned id;          /* The id of the use.  */
 361   unsigned group_id;    /* The group id the use belongs to.  */
 362   enum use_type type;   /* Type of the use.  */
 363   struct iv *iv;        /* The induction variable it is based on.  */
 364   gimple *stmt;         /* Statement in that it occurs.  */
 365   tree *op_p;           /* The place where it occurs.  */
 366
 367   tree addr_base;       /* Base address with const offset stripped.  */
 368   unsigned HOST_WIDE_INT addr_offset;
 369                         /* Const offset stripped from base address.  */
 370 };
 371
 372 /* Group of uses.  */
 373 struct iv_group
 374 {
 375   /* The id of the group.  */
 376   unsigned id;
 377   /* Uses of the group are of the same type.  */
 378   enum use_type type;
 379   /* The set of "related" IV candidates, plus the important ones.  */
 380   bitmap related_cands;
 381   /* Number of IV candidates in the cost_map.  */
 382   unsigned n_map_members;
 383   /* The costs wrto the iv candidates.  */
 384   struct cost_pair *cost_map;
 385   /* The selected candidate for the group.  */
 386   struct iv_cand *selected;
 387   /* Uses in the group.  */
 388   vec<struct iv_use *> vuses;
 389 };
 390
 391 /* The position where the iv is computed.  */
 392 enum iv_position
 393 {
 394   IP_NORMAL,            /* At the end, just before the exit condition.  */
 395   IP_END,               /* At the end of the latch block.  */
 396   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 397   IP_AFTER_USE,         /* Immediately after a specific use.  */
 398   IP_ORIGINAL           /* The original biv.  */
 399 };
 400
 401 /* The induction variable candidate.  */
 402 struct iv_cand
 403 {
 404   unsigned id;          /* The number of the candidate.  */
 405   bool important;       /* Whether this is an "important" candidate, i.e. such
 406                            that it should be considered by all uses.  */
 407   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 408   gimple *incremented_at;/* For original biv, the statement where it is
 409                            incremented.  */
 410   tree var_before;      /* The variable used for it before increment.  */
 411   tree var_after;       /* The variable used for it after increment.  */
 412   struct iv *iv;        /* The value of the candidate.  NULL for
 413                            "pseudocandidate" used to indicate the possibility
 414                            to replace the final value of an iv by direct
 415                            computation of the value.  */
 416   unsigned cost;        /* Cost of the candidate.  */
 417   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 418   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 419                               where it is incremented.  */
 420   bitmap depends_on;    /* The list of invariants that are used in step of the
 421                            biv.  */
 422   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 423                            smaller type.  */
 424 };
 425
 426 /* Hashtable entry for common candidate derived from iv uses.  */
 427 struct iv_common_cand
 428 {
 429   tree base;
 430   tree step;
 431   /* IV uses from which this common candidate is derived.  */
 432   auto_vec<struct iv_use *> uses;
 433   hashval_t hash;
 434 };
 435
 436 /* Hashtable helpers.  */
 437
 438 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 439 {
 440   static inline hashval_t hash (const iv_common_cand *);
 441   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 442 };
 443
 444 /* Hash function for possible common candidates.  */
 445
 446 inline hashval_t
 447 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 448 {
 449   return ccand->hash;
 450 }
 451
 452 /* Hash table equality function for common candidates.  */
 453
 454 inline bool
 455 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 456                               const iv_common_cand *ccand2)
 457 {
 458   return (ccand1->hash == ccand2->hash
 459           && operand_equal_p (ccand1->base, ccand2->base, 0)
 460           && operand_equal_p (ccand1->step, ccand2->step, 0)
 461           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 462               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 463 }
 464
 465 /* Loop invariant expression hashtable entry.  */
 466
 467 struct iv_inv_expr_ent
 468 {
 469   /* Tree expression of the entry.  */
 470   tree expr;
 471   /* Unique indentifier.  */
 472   int id;
 473   /* Hash value.  */
 474   hashval_t hash;
 475 };
 476
 477 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 478
 479 static int
 480 sort_iv_inv_expr_ent (const void *a, const void *b)
 481 {
 482   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 483   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 484
 485   unsigned id1 = (*e1)->id;
 486   unsigned id2 = (*e2)->id;
 487
 488   if (id1 < id2)
 489     return -1;
 490   else if (id1 > id2)
 491     return 1;
 492   else
 493     return 0;
 494 }
 495
 496 /* Hashtable helpers.  */
 497
 498 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 499 {
 500   static inline hashval_t hash (const iv_inv_expr_ent *);
 501   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 502 };
 503
 504 /* Hash function for loop invariant expressions.  */
 505
 506 inline hashval_t
 507 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 508 {
 509   return expr->hash;
 510 }
 511
 512 /* Hash table equality function for expressions.  */
 513
 514 inline bool
 515 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 516                            const iv_inv_expr_ent *expr2)
 517 {
 518   return expr1->hash == expr2->hash
 519          && operand_equal_p (expr1->expr, expr2->expr, 0);
 520 }
 521
 522 struct ivopts_data
 523 {
 524   /* The currently optimized loop.  */
 525   struct loop *current_loop;
 526   source_location loop_loc;
 527
 528   /* Numbers of iterations for all exits of the current loop.  */
 529   hash_map<edge, tree_niter_desc *> *niters;
 530
 531   /* Number of registers used in it.  */
 532   unsigned regs_used;
 533
 534   /* The size of version_info array allocated.  */
 535   unsigned version_info_size;
 536
 537   /* The array of information for the ssa names.  */
 538   struct version_info *version_info;
 539
 540   /* The hashtable of loop invariant expressions created
 541      by ivopt.  */
 542   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 543
 544   /* Loop invariant expression id.  */
 545   int max_inv_expr_id;
 546
 547   /* The bitmap of indices in version_info whose value was changed.  */
 548   bitmap relevant;
 549
 550   /* The uses of induction variables.  */
 551   vec<iv_group *> vgroups;
 552
 553   /* The candidates.  */
 554   vec<iv_cand *> vcands;
 555
 556   /* A bitmap of important candidates.  */
 557   bitmap important_candidates;
 558
 559   /* Cache used by tree_to_aff_combination_expand.  */
 560   hash_map<tree, name_expansion *> *name_expansion_cache;
 561
 562   /* The hashtable of common candidates derived from iv uses.  */
 563   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 564
 565   /* The common candidates.  */
 566   vec<iv_common_cand *> iv_common_cands;
 567
 568   /* The maximum invariant id.  */
 569   unsigned max_inv_id;
 570
 571   /* Number of no_overflow BIVs which are not used in memory address.  */
 572   unsigned bivs_not_used_in_addr;
 573
 574   /* Obstack for iv structure.  */
 575   struct obstack iv_obstack;
 576
 577   /* Whether to consider just related and important candidates when replacing a
 578      use.  */
 579   bool consider_all_candidates;
 580
 581   /* Are we optimizing for speed?  */
 582   bool speed;
 583
 584   /* Whether the loop body includes any function calls.  */
 585   bool body_includes_call;
 586
 587   /* Whether the loop body can only be exited via single exit.  */
 588   bool loop_single_exit_p;
 589 };
 590
 591 /* An assignment of iv candidates to uses.  */
 592
 593 struct iv_ca
 594 {
 595   /* The number of uses covered by the assignment.  */
 596   unsigned upto;
 597
 598   /* Number of uses that cannot be expressed by the candidates in the set.  */
 599   unsigned bad_groups;
 600
 601   /* Candidate assigned to a use, together with the related costs.  */
 602   struct cost_pair **cand_for_group;
 603
 604   /* Number of times each candidate is used.  */
 605   unsigned *n_cand_uses;
 606
 607   /* The candidates used.  */
 608   bitmap cands;
 609
 610   /* The number of candidates in the set.  */
 611   unsigned n_cands;
 612
 613   /* Total number of registers needed.  */
 614   unsigned n_regs;
 615
 616   /* Total cost of expressing uses.  */
 617   comp_cost cand_use_cost;
 618
 619   /* Total cost of candidates.  */
 620   unsigned cand_cost;
 621
 622   /* Number of times each invariant is used.  */
 623   unsigned *n_invariant_uses;
 624
 625   /* Hash set with used invariant expression.  */
 626   hash_map <iv_inv_expr_ent *, unsigned> *used_inv_exprs;
 627
 628   /* Total cost of the assignment.  */
 629   comp_cost cost;
 630 };
 631
 632 /* Difference of two iv candidate assignments.  */
 633
 634 struct iv_ca_delta
 635 {
 636   /* Changed group.  */
 637   struct iv_group *group;
 638
 639   /* An old assignment (for rollback purposes).  */
 640   struct cost_pair *old_cp;
 641
 642   /* A new assignment.  */
 643   struct cost_pair *new_cp;
 644
 645   /* Next change in the list.  */
 646   struct iv_ca_delta *next;
 647 };
 648
 649 /* Bound on number of candidates below that all candidates are considered.  */
 650
 651 #define CONSIDER_ALL_CANDIDATES_BOUND \
 652   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 653
 654 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 655    optimizing such a loop would help, and it would take ages).  */
 656
 657 #define MAX_CONSIDERED_GROUPS \
 658   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 659
 660 /* If there are at most this number of ivs in the set, try removing unnecessary
 661    ivs from the set always.  */
 662
 663 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 664   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 665
 666 /* The list of trees for that the decl_rtl field must be reset is stored
 667    here.  */
 668
 669 static vec<tree> decl_rtl_to_reset;
 670
 671 static comp_cost force_expr_to_var_cost (tree, bool);
 672
 673 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 674
 675 edge
 676 single_dom_exit (struct loop *loop)
 677 {
 678   edge exit = single_exit (loop);
 679
 680   if (!exit)
 681     return NULL;
 682
 683   if (!just_once_each_iteration_p (loop, exit->src))
 684     return NULL;
 685
 686   return exit;
 687 }
 688
 689 /* Dumps information about the induction variable IV to FILE.  Don't dump
 690    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 691    preceding spaces indicated by INDENT_LEVEL.  */
 692
 693 void
 694 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 695 {
 696   const char *p;
 697   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 698
 699   if (indent_level > 4)
 700     indent_level = 4;
 701   p = spaces + 8 - (indent_level << 1);
 702
 703   fprintf (file, "%sIV struct:\n", p);
 704   if (iv->ssa_name && dump_name)
 705     {
 706       fprintf (file, "%s  SSA_NAME:\t", p);
 707       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 708       fprintf (file, "\n");
 709     }
 710
 711   fprintf (file, "%s  Type:\t", p);
 712   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 713   fprintf (file, "\n");
 714
 715   fprintf (file, "%s  Base:\t", p);
 716   print_generic_expr (file, iv->base, TDF_SLIM);
 717   fprintf (file, "\n");
 718
 719   fprintf (file, "%s  Step:\t", p);
 720   print_generic_expr (file, iv->step, TDF_SLIM);
 721   fprintf (file, "\n");
 722
 723   if (iv->base_object)
 724     {
 725       fprintf (file, "%s  Object:\t", p);
 726       print_generic_expr (file, iv->base_object, TDF_SLIM);
 727       fprintf (file, "\n");
 728     }
 729
 730   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 731
 732   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 733            p, iv->no_overflow ? "No-overflow" : "Overflow");
 734 }
 735
 736 /* Dumps information about the USE to FILE.  */
 737
 738 void
 739 dump_use (FILE *file, struct iv_use *use)
 740 {
 741   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 742   fprintf (file, "    At stmt:\t");
 743   print_gimple_stmt (file, use->stmt, 0, 0);
 744   fprintf (file, "    At pos:\t");
 745   if (use->op_p)
 746     print_generic_expr (file, *use->op_p, TDF_SLIM);
 747   fprintf (file, "\n");
 748   dump_iv (file, use->iv, false, 2);
 749 }
 750
 751 /* Dumps information about the uses to FILE.  */
 752
 753 void
 754 dump_groups (FILE *file, struct ivopts_data *data)
 755 {
 756   unsigned i, j;
 757   struct iv_group *group;
 758
 759   for (i = 0; i < data->vgroups.length (); i++)
 760     {
 761       group = data->vgroups[i];
 762       fprintf (file, "Group %d:\n", group->id);
 763       if (group->type == USE_NONLINEAR_EXPR)
 764         fprintf (file, "  Type:\tGENERIC\n");
 765       else if (group->type == USE_ADDRESS)
 766         fprintf (file, "  Type:\tADDRESS\n");
 767       else
 768         {
 769           gcc_assert (group->type == USE_COMPARE);
 770           fprintf (file, "  Type:\tCOMPARE\n");
 771         }
 772       for (j = 0; j < group->vuses.length (); j++)
 773         dump_use (file, group->vuses[j]);
 774     }
 775 }
 776
 777 /* Dumps information about induction variable candidate CAND to FILE.  */
 778
 779 void
 780 dump_cand (FILE *file, struct iv_cand *cand)
 781 {
 782   struct iv *iv = cand->iv;
 783
 784   fprintf (file, "Candidate %d:\n", cand->id);
 785   if (cand->depends_on)
 786     {
 787       fprintf (file, "  Depend on: ");
 788       dump_bitmap (file, cand->depends_on);
 789     }
 790
 791   if (cand->var_before)
 792     {
 793       fprintf (file, "  Var befor: ");
 794       print_generic_expr (file, cand->var_before, TDF_SLIM);
 795       fprintf (file, "\n");
 796     }
 797   if (cand->var_after)
 798     {
 799       fprintf (file, "  Var after: ");
 800       print_generic_expr (file, cand->var_after, TDF_SLIM);
 801       fprintf (file, "\n");
 802     }
 803
 804   switch (cand->pos)
 805     {
 806     case IP_NORMAL:
 807       fprintf (file, "  Incr POS: before exit test\n");
 808       break;
 809
 810     case IP_BEFORE_USE:
 811       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 812       break;
 813
 814     case IP_AFTER_USE:
 815       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 816       break;
 817
 818     case IP_END:
 819       fprintf (file, "  Incr POS: at end\n");
 820       break;
 821
 822     case IP_ORIGINAL:
 823       fprintf (file, "  Incr POS: orig biv\n");
 824       break;
 825     }
 826
 827   dump_iv (file, iv, false, 1);
 828 }
 829
 830 /* Returns the info for ssa version VER.  */
 831
 832 static inline struct version_info *
 833 ver_info (struct ivopts_data *data, unsigned ver)
 834 {
 835   return data->version_info + ver;
 836 }
 837
 838 /* Returns the info for ssa name NAME.  */
 839
 840 static inline struct version_info *
 841 name_info (struct ivopts_data *data, tree name)
 842 {
 843   return ver_info (data, SSA_NAME_VERSION (name));
 844 }
 845
 846 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 847    emitted in LOOP.  */
 848
 849 static bool
 850 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 851 {
 852   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 853
 854   gcc_assert (bb);
 855
 856   if (sbb == loop->latch)
 857     return true;
 858
 859   if (sbb != bb)
 860     return false;
 861
 862   return stmt == last_stmt (bb);
 863 }
 864
 865 /* Returns true if STMT if after the place where the original induction
 866    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 867    if the positions are identical.  */
 868
 869 static bool
 870 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 871 {
 872   basic_block cand_bb = gimple_bb (cand->incremented_at);
 873   basic_block stmt_bb = gimple_bb (stmt);
 874
 875   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 876     return false;
 877
 878   if (stmt_bb != cand_bb)
 879     return true;
 880
 881   if (true_if_equal
 882       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 883     return true;
 884   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 885 }
 886
 887 /* Returns true if STMT if after the place where the induction variable
 888    CAND is incremented in LOOP.  */
 889
 890 static bool
 891 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 892 {
 893   switch (cand->pos)
 894     {
 895     case IP_END:
 896       return false;
 897
 898     case IP_NORMAL:
 899       return stmt_after_ip_normal_pos (loop, stmt);
 900
 901     case IP_ORIGINAL:
 902     case IP_AFTER_USE:
 903       return stmt_after_inc_pos (cand, stmt, false);
 904
 905     case IP_BEFORE_USE:
 906       return stmt_after_inc_pos (cand, stmt, true);
 907
 908     default:
 909       gcc_unreachable ();
 910     }
 911 }
 912
 913 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 914
 915 static bool
 916 abnormal_ssa_name_p (tree exp)
 917 {
 918   if (!exp)
 919     return false;
 920
 921   if (TREE_CODE (exp) != SSA_NAME)
 922     return false;
 923
 924   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 925 }
 926
 927 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 928    abnormal phi node.  Callback for for_each_index.  */
 929
 930 static bool
 931 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 932                                   void *data ATTRIBUTE_UNUSED)
 933 {
 934   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 935     {
 936       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 937         return false;
 938       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 939         return false;
 940     }
 941
 942   return !abnormal_ssa_name_p (*index);
 943 }
 944
 945 /* Returns true if EXPR contains a ssa name that occurs in an
 946    abnormal phi node.  */
 947
 948 bool
 949 contains_abnormal_ssa_name_p (tree expr)
 950 {
 951   enum tree_code code;
 952   enum tree_code_class codeclass;
 953
 954   if (!expr)
 955     return false;
 956
 957   code = TREE_CODE (expr);
 958   codeclass = TREE_CODE_CLASS (code);
 959
 960   if (code == SSA_NAME)
 961     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 962
 963   if (code == INTEGER_CST
 964       || is_gimple_min_invariant (expr))
 965     return false;
 966
 967   if (code == ADDR_EXPR)
 968     return !for_each_index (&TREE_OPERAND (expr, 0),
 969                             idx_contains_abnormal_ssa_name_p,
 970                             NULL);
 971
 972   if (code == COND_EXPR)
 973     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 974       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 975       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 976
 977   switch (codeclass)
 978     {
 979     case tcc_binary:
 980     case tcc_comparison:
 981       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 982         return true;
 983
 984       /* Fallthru.  */
 985     case tcc_unary:
 986       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 987         return true;
 988
 989       break;
 990
 991     default:
 992       gcc_unreachable ();
 993     }
 994
 995   return false;
 996 }
 997
 998 /*  Returns the structure describing number of iterations determined from
 999     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1000
1001 static struct tree_niter_desc *
1002 niter_for_exit (struct ivopts_data *data, edge exit)
1003 {
1004   struct tree_niter_desc *desc;
1005   tree_niter_desc **slot;
1006
1007   if (!data->niters)
1008     {
1009       data->niters = new hash_map<edge, tree_niter_desc *>;
1010       slot = NULL;
1011     }
1012   else
1013     slot = data->niters->get (exit);
1014
1015   if (!slot)
1016     {
1017       /* Try to determine number of iterations.  We cannot safely work with ssa
1018          names that appear in phi nodes on abnormal edges, so that we do not
1019          create overlapping life ranges for them (PR 27283).  */
1020       desc = XNEW (struct tree_niter_desc);
1021       if (!number_of_iterations_exit (data->current_loop,
1022                                       exit, desc, true)
1023           || contains_abnormal_ssa_name_p (desc->niter))
1024         {
1025           XDELETE (desc);
1026           desc = NULL;
1027         }
1028       data->niters->put (exit, desc);
1029     }
1030   else
1031     desc = *slot;
1032
1033   return desc;
1034 }
1035
1036 /* Returns the structure describing number of iterations determined from
1037    single dominating exit of DATA->current_loop, or NULL if something
1038    goes wrong.  */
1039
1040 static struct tree_niter_desc *
1041 niter_for_single_dom_exit (struct ivopts_data *data)
1042 {
1043   edge exit = single_dom_exit (data->current_loop);
1044
1045   if (!exit)
1046     return NULL;
1047
1048   return niter_for_exit (data, exit);
1049 }
1050
1051 /* Initializes data structures used by the iv optimization pass, stored
1052    in DATA.  */
1053
1054 static void
1055 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1056 {
1057   data->version_info_size = 2 * num_ssa_names;
1058   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1059   data->relevant = BITMAP_ALLOC (NULL);
1060   data->important_candidates = BITMAP_ALLOC (NULL);
1061   data->max_inv_id = 0;
1062   data->niters = NULL;
1063   data->vgroups.create (20);
1064   data->vcands.create (20);
1065   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1066   data->max_inv_expr_id = 0;
1067   data->name_expansion_cache = NULL;
1068   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1069   data->iv_common_cands.create (20);
1070   decl_rtl_to_reset.create (20);
1071   gcc_obstack_init (&data->iv_obstack);
1072 }
1073
1074 /* Returns a memory object to that EXPR points.  In case we are able to
1075    determine that it does not point to any such object, NULL is returned.  */
1076
1077 static tree
1078 determine_base_object (tree expr)
1079 {
1080   enum tree_code code = TREE_CODE (expr);
1081   tree base, obj;
1082
1083   /* If this is a pointer casted to any type, we need to determine
1084      the base object for the pointer; so handle conversions before
1085      throwing away non-pointer expressions.  */
1086   if (CONVERT_EXPR_P (expr))
1087     return determine_base_object (TREE_OPERAND (expr, 0));
1088
1089   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1090     return NULL_TREE;
1091
1092   switch (code)
1093     {
1094     case INTEGER_CST:
1095       return NULL_TREE;
1096
1097     case ADDR_EXPR:
1098       obj = TREE_OPERAND (expr, 0);
1099       base = get_base_address (obj);
1100
1101       if (!base)
1102         return expr;
1103
1104       if (TREE_CODE (base) == MEM_REF)
1105         return determine_base_object (TREE_OPERAND (base, 0));
1106
1107       return fold_convert (ptr_type_node,
1108                            build_fold_addr_expr (base));
1109
1110     case POINTER_PLUS_EXPR:
1111       return determine_base_object (TREE_OPERAND (expr, 0));
1112
1113     case PLUS_EXPR:
1114     case MINUS_EXPR:
1115       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1116       gcc_unreachable ();
1117
1118     default:
1119       return fold_convert (ptr_type_node, expr);
1120     }
1121 }
1122
1123 /* Return true if address expression with non-DECL_P operand appears
1124    in EXPR.  */
1125
1126 static bool
1127 contain_complex_addr_expr (tree expr)
1128 {
1129   bool res = false;
1130
1131   STRIP_NOPS (expr);
1132   switch (TREE_CODE (expr))
1133     {
1134     case POINTER_PLUS_EXPR:
1135     case PLUS_EXPR:
1136     case MINUS_EXPR:
1137       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1138       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1139       break;
1140
1141     case ADDR_EXPR:
1142       return (!DECL_P (TREE_OPERAND (expr, 0)));
1143
1144     default:
1145       return false;
1146     }
1147
1148   return res;
1149 }
1150
1151 /* Allocates an induction variable with given initial value BASE and step STEP
1152    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1153
1154 static struct iv *
1155 alloc_iv (struct ivopts_data *data, tree base, tree step,
1156           bool no_overflow = false)
1157 {
1158   tree expr = base;
1159   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1160                                               sizeof (struct iv));
1161   gcc_assert (step != NULL_TREE);
1162
1163   /* Lower address expression in base except ones with DECL_P as operand.
1164      By doing this:
1165        1) More accurate cost can be computed for address expressions;
1166        2) Duplicate candidates won't be created for bases in different
1167           forms, like &a[0] and &a.  */
1168   STRIP_NOPS (expr);
1169   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1170       || contain_complex_addr_expr (expr))
1171     {
1172       aff_tree comb;
1173       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1174       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1175     }
1176
1177   iv->base = base;
1178   iv->base_object = determine_base_object (base);
1179   iv->step = step;
1180   iv->biv_p = false;
1181   iv->nonlin_use = NULL;
1182   iv->ssa_name = NULL_TREE;
1183   if (!no_overflow
1184        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1185                               base, step))
1186     no_overflow = true;
1187   iv->no_overflow = no_overflow;
1188   iv->have_address_use = false;
1189
1190   return iv;
1191 }
1192
1193 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1194    doesn't overflow.  */
1195
1196 static void
1197 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1198         bool no_overflow)
1199 {
1200   struct version_info *info = name_info (data, iv);
1201
1202   gcc_assert (!info->iv);
1203
1204   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1205   info->iv = alloc_iv (data, base, step, no_overflow);
1206   info->iv->ssa_name = iv;
1207 }
1208
1209 /* Finds induction variable declaration for VAR.  */
1210
1211 static struct iv *
1212 get_iv (struct ivopts_data *data, tree var)
1213 {
1214   basic_block bb;
1215   tree type = TREE_TYPE (var);
1216
1217   if (!POINTER_TYPE_P (type)
1218       && !INTEGRAL_TYPE_P (type))
1219     return NULL;
1220
1221   if (!name_info (data, var)->iv)
1222     {
1223       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1224
1225       if (!bb
1226           || !flow_bb_inside_loop_p (data->current_loop, bb))
1227         set_iv (data, var, var, build_int_cst (type, 0), true);
1228     }
1229
1230   return name_info (data, var)->iv;
1231 }
1232
1233 /* Return the first non-invariant ssa var found in EXPR.  */
1234
1235 static tree
1236 extract_single_var_from_expr (tree expr)
1237 {
1238   int i, n;
1239   tree tmp;
1240   enum tree_code code;
1241
1242   if (!expr || is_gimple_min_invariant (expr))
1243     return NULL;
1244
1245   code = TREE_CODE (expr);
1246   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1247     {
1248       n = TREE_OPERAND_LENGTH (expr);
1249       for (i = 0; i < n; i++)
1250         {
1251           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1252
1253           if (tmp)
1254             return tmp;
1255         }
1256     }
1257   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1258 }
1259
1260 /* Finds basic ivs.  */
1261
1262 static bool
1263 find_bivs (struct ivopts_data *data)
1264 {
1265   gphi *phi;
1266   affine_iv iv;
1267   tree step, type, base, stop;
1268   bool found = false;
1269   struct loop *loop = data->current_loop;
1270   gphi_iterator psi;
1271
1272   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1273     {
1274       phi = psi.phi ();
1275
1276       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1277         continue;
1278
1279       if (virtual_operand_p (PHI_RESULT (phi)))
1280         continue;
1281
1282       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1283         continue;
1284
1285       if (integer_zerop (iv.step))
1286         continue;
1287
1288       step = iv.step;
1289       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1290       /* Stop expanding iv base at the first ssa var referred by iv step.
1291          Ideally we should stop at any ssa var, because that's expensive
1292          and unusual to happen, we just do it on the first one.
1293
1294          See PR64705 for the rationale.  */
1295       stop = extract_single_var_from_expr (step);
1296       base = expand_simple_operations (base, stop);
1297       if (contains_abnormal_ssa_name_p (base)
1298           || contains_abnormal_ssa_name_p (step))
1299         continue;
1300
1301       type = TREE_TYPE (PHI_RESULT (phi));
1302       base = fold_convert (type, base);
1303       if (step)
1304         {
1305           if (POINTER_TYPE_P (type))
1306             step = convert_to_ptrofftype (step);
1307           else
1308             step = fold_convert (type, step);
1309         }
1310
1311       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1312       found = true;
1313     }
1314
1315   return found;
1316 }
1317
1318 /* Marks basic ivs.  */
1319
1320 static void
1321 mark_bivs (struct ivopts_data *data)
1322 {
1323   gphi *phi;
1324   gimple *def;
1325   tree var;
1326   struct iv *iv, *incr_iv;
1327   struct loop *loop = data->current_loop;
1328   basic_block incr_bb;
1329   gphi_iterator psi;
1330
1331   data->bivs_not_used_in_addr = 0;
1332   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1333     {
1334       phi = psi.phi ();
1335
1336       iv = get_iv (data, PHI_RESULT (phi));
1337       if (!iv)
1338         continue;
1339
1340       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1341       def = SSA_NAME_DEF_STMT (var);
1342       /* Don't mark iv peeled from other one as biv.  */
1343       if (def
1344           && gimple_code (def) == GIMPLE_PHI
1345           && gimple_bb (def) == loop->header)
1346         continue;
1347
1348       incr_iv = get_iv (data, var);
1349       if (!incr_iv)
1350         continue;
1351
1352       /* If the increment is in the subloop, ignore it.  */
1353       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1354       if (incr_bb->loop_father != data->current_loop
1355           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1356         continue;
1357
1358       iv->biv_p = true;
1359       incr_iv->biv_p = true;
1360       if (iv->no_overflow)
1361         data->bivs_not_used_in_addr++;
1362       if (incr_iv->no_overflow)
1363         data->bivs_not_used_in_addr++;
1364     }
1365 }
1366
1367 /* Checks whether STMT defines a linear induction variable and stores its
1368    parameters to IV.  */
1369
1370 static bool
1371 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1372 {
1373   tree lhs, stop;
1374   struct loop *loop = data->current_loop;
1375
1376   iv->base = NULL_TREE;
1377   iv->step = NULL_TREE;
1378
1379   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1380     return false;
1381
1382   lhs = gimple_assign_lhs (stmt);
1383   if (TREE_CODE (lhs) != SSA_NAME)
1384     return false;
1385
1386   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1387     return false;
1388
1389   /* Stop expanding iv base at the first ssa var referred by iv step.
1390      Ideally we should stop at any ssa var, because that's expensive
1391      and unusual to happen, we just do it on the first one.
1392
1393      See PR64705 for the rationale.  */
1394   stop = extract_single_var_from_expr (iv->step);
1395   iv->base = expand_simple_operations (iv->base, stop);
1396   if (contains_abnormal_ssa_name_p (iv->base)
1397       || contains_abnormal_ssa_name_p (iv->step))
1398     return false;
1399
1400   /* If STMT could throw, then do not consider STMT as defining a GIV.
1401      While this will suppress optimizations, we can not safely delete this
1402      GIV and associated statements, even if it appears it is not used.  */
1403   if (stmt_could_throw_p (stmt))
1404     return false;
1405
1406   return true;
1407 }
1408
1409 /* Finds general ivs in statement STMT.  */
1410
1411 static void
1412 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1413 {
1414   affine_iv iv;
1415
1416   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1417     return;
1418
1419   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1420 }
1421
1422 /* Finds general ivs in basic block BB.  */
1423
1424 static void
1425 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1426 {
1427   gimple_stmt_iterator bsi;
1428
1429   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1430     find_givs_in_stmt (data, gsi_stmt (bsi));
1431 }
1432
1433 /* Finds general ivs.  */
1434
1435 static void
1436 find_givs (struct ivopts_data *data)
1437 {
1438   struct loop *loop = data->current_loop;
1439   basic_block *body = get_loop_body_in_dom_order (loop);
1440   unsigned i;
1441
1442   for (i = 0; i < loop->num_nodes; i++)
1443     find_givs_in_bb (data, body[i]);
1444   free (body);
1445 }
1446
1447 /* For each ssa name defined in LOOP determines whether it is an induction
1448    variable and if so, its initial value and step.  */
1449
1450 static bool
1451 find_induction_variables (struct ivopts_data *data)
1452 {
1453   unsigned i;
1454   bitmap_iterator bi;
1455
1456   if (!find_bivs (data))
1457     return false;
1458
1459   find_givs (data);
1460   mark_bivs (data);
1461
1462   if (dump_file && (dump_flags & TDF_DETAILS))
1463     {
1464       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1465
1466       if (niter)
1467         {
1468           fprintf (dump_file, "  number of iterations ");
1469           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1470           if (!integer_zerop (niter->may_be_zero))
1471             {
1472               fprintf (dump_file, "; zero if ");
1473               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1474             }
1475           fprintf (dump_file, "\n");
1476         };
1477
1478       fprintf (dump_file, "\n<Induction Vars>:\n");
1479       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1480         {
1481           struct version_info *info = ver_info (data, i);
1482           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1483             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1484         }
1485     }
1486
1487   return true;
1488 }
1489
1490 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1491    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1492    is the const offset stripped from IV base; for other types use, both
1493    are zero by default.  */
1494
1495 static struct iv_use *
1496 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1497             gimple *stmt, enum use_type type, tree addr_base,
1498             unsigned HOST_WIDE_INT addr_offset)
1499 {
1500   struct iv_use *use = XCNEW (struct iv_use);
1501
1502   use->id = group->vuses.length ();
1503   use->group_id = group->id;
1504   use->type = type;
1505   use->iv = iv;
1506   use->stmt = stmt;
1507   use->op_p = use_p;
1508   use->addr_base = addr_base;
1509   use->addr_offset = addr_offset;
1510
1511   group->vuses.safe_push (use);
1512   return use;
1513 }
1514
1515 /* Checks whether OP is a loop-level invariant and if so, records it.
1516    NONLINEAR_USE is true if the invariant is used in a way we do not
1517    handle specially.  */
1518
1519 static void
1520 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1521 {
1522   basic_block bb;
1523   struct version_info *info;
1524
1525   if (TREE_CODE (op) != SSA_NAME
1526       || virtual_operand_p (op))
1527     return;
1528
1529   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1530   if (bb
1531       && flow_bb_inside_loop_p (data->current_loop, bb))
1532     return;
1533
1534   info = name_info (data, op);
1535   info->name = op;
1536   info->has_nonlin_use |= nonlinear_use;
1537   if (!info->inv_id)
1538     info->inv_id = ++data->max_inv_id;
1539   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1540 }
1541
1542 static tree
1543 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1544
1545 /* Record a group of TYPE.  */
1546
1547 static struct iv_group *
1548 record_group (struct ivopts_data *data, enum use_type type)
1549 {
1550   struct iv_group *group = XCNEW (struct iv_group);
1551
1552   group->id = data->vgroups.length ();
1553   group->type = type;
1554   group->related_cands = BITMAP_ALLOC (NULL);
1555   group->vuses.create (1);
1556
1557   data->vgroups.safe_push (group);
1558   return group;
1559 }
1560
1561 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1562    New group will be created if there is no existing group for the use.  */
1563
1564 static struct iv_use *
1565 record_group_use (struct ivopts_data *data, tree *use_p,
1566                   struct iv *iv, gimple *stmt, enum use_type type)
1567 {
1568   tree addr_base = NULL;
1569   struct iv_group *group = NULL;
1570   unsigned HOST_WIDE_INT addr_offset = 0;
1571
1572   /* Record non address type use in a new group.  */
1573   if (type == USE_ADDRESS && iv->base_object)
1574     {
1575       unsigned int i;
1576
1577       addr_base = strip_offset (iv->base, &addr_offset);
1578       for (i = 0; i < data->vgroups.length (); i++)
1579         {
1580           struct iv_use *use;
1581
1582           group = data->vgroups[i];
1583           use = group->vuses[0];
1584           if (use->type != USE_ADDRESS || !use->iv->base_object)
1585             continue;
1586
1587           /* Check if it has the same stripped base and step.  */
1588           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1589               && operand_equal_p (iv->step, use->iv->step, 0)
1590               && operand_equal_p (addr_base, use->addr_base, 0))
1591             break;
1592         }
1593       if (i == data->vgroups.length ())
1594         group = NULL;
1595     }
1596
1597   if (!group)
1598     group = record_group (data, type);
1599
1600   return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1601 }
1602
1603 /* Checks whether the use OP is interesting and if so, records it.  */
1604
1605 static struct iv_use *
1606 find_interesting_uses_op (struct ivopts_data *data, tree op)
1607 {
1608   struct iv *iv;
1609   gimple *stmt;
1610   struct iv_use *use;
1611
1612   if (TREE_CODE (op) != SSA_NAME)
1613     return NULL;
1614
1615   iv = get_iv (data, op);
1616   if (!iv)
1617     return NULL;
1618
1619   if (iv->nonlin_use)
1620     {
1621       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1622       return iv->nonlin_use;
1623     }
1624
1625   if (integer_zerop (iv->step))
1626     {
1627       record_invariant (data, op, true);
1628       return NULL;
1629     }
1630
1631   stmt = SSA_NAME_DEF_STMT (op);
1632   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1633
1634   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1635   iv->nonlin_use = use;
1636   return use;
1637 }
1638
1639 /* Given a condition in statement STMT, checks whether it is a compare
1640    of an induction variable and an invariant.  If this is the case,
1641    CONTROL_VAR is set to location of the iv, BOUND to the location of
1642    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1643    induction variable descriptions, and true is returned.  If this is not
1644    the case, CONTROL_VAR and BOUND are set to the arguments of the
1645    condition and false is returned.  */
1646
1647 static bool
1648 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1649                        tree **control_var, tree **bound,
1650                        struct iv **iv_var, struct iv **iv_bound)
1651 {
1652   /* The objects returned when COND has constant operands.  */
1653   static struct iv const_iv;
1654   static tree zero;
1655   tree *op0 = &zero, *op1 = &zero;
1656   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1657   bool ret = false;
1658
1659   if (gimple_code (stmt) == GIMPLE_COND)
1660     {
1661       gcond *cond_stmt = as_a <gcond *> (stmt);
1662       op0 = gimple_cond_lhs_ptr (cond_stmt);
1663       op1 = gimple_cond_rhs_ptr (cond_stmt);
1664     }
1665   else
1666     {
1667       op0 = gimple_assign_rhs1_ptr (stmt);
1668       op1 = gimple_assign_rhs2_ptr (stmt);
1669     }
1670
1671   zero = integer_zero_node;
1672   const_iv.step = integer_zero_node;
1673
1674   if (TREE_CODE (*op0) == SSA_NAME)
1675     iv0 = get_iv (data, *op0);
1676   if (TREE_CODE (*op1) == SSA_NAME)
1677     iv1 = get_iv (data, *op1);
1678
1679   /* Exactly one of the compared values must be an iv, and the other one must
1680      be an invariant.  */
1681   if (!iv0 || !iv1)
1682     goto end;
1683
1684   if (integer_zerop (iv0->step))
1685     {
1686       /* Control variable may be on the other side.  */
1687       std::swap (op0, op1);
1688       std::swap (iv0, iv1);
1689     }
1690   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1691
1692 end:
1693   if (control_var)
1694     *control_var = op0;
1695   if (iv_var)
1696     *iv_var = iv0;
1697   if (bound)
1698     *bound = op1;
1699   if (iv_bound)
1700     *iv_bound = iv1;
1701
1702   return ret;
1703 }
1704
1705 /* Checks whether the condition in STMT is interesting and if so,
1706    records it.  */
1707
1708 static void
1709 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1710 {
1711   tree *var_p, *bound_p;
1712   struct iv *var_iv;
1713
1714   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1715     {
1716       find_interesting_uses_op (data, *var_p);
1717       find_interesting_uses_op (data, *bound_p);
1718       return;
1719     }
1720
1721   record_group_use (data, NULL, var_iv, stmt, USE_COMPARE);
1722 }
1723
1724 /* Returns the outermost loop EXPR is obviously invariant in
1725    relative to the loop LOOP, i.e. if all its operands are defined
1726    outside of the returned loop.  Returns NULL if EXPR is not
1727    even obviously invariant in LOOP.  */
1728
1729 struct loop *
1730 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1731 {
1732   basic_block def_bb;
1733   unsigned i, len;
1734
1735   if (is_gimple_min_invariant (expr))
1736     return current_loops->tree_root;
1737
1738   if (TREE_CODE (expr) == SSA_NAME)
1739     {
1740       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1741       if (def_bb)
1742         {
1743           if (flow_bb_inside_loop_p (loop, def_bb))
1744             return NULL;
1745           return superloop_at_depth (loop,
1746                                      loop_depth (def_bb->loop_father) + 1);
1747         }
1748
1749       return current_loops->tree_root;
1750     }
1751
1752   if (!EXPR_P (expr))
1753     return NULL;
1754
1755   unsigned maxdepth = 0;
1756   len = TREE_OPERAND_LENGTH (expr);
1757   for (i = 0; i < len; i++)
1758     {
1759       struct loop *ivloop;
1760       if (!TREE_OPERAND (expr, i))
1761         continue;
1762
1763       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1764       if (!ivloop)
1765         return NULL;
1766       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1767     }
1768
1769   return superloop_at_depth (loop, maxdepth);
1770 }
1771
1772 /* Returns true if expression EXPR is obviously invariant in LOOP,
1773    i.e. if all its operands are defined outside of the LOOP.  LOOP
1774    should not be the function body.  */
1775
1776 bool
1777 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1778 {
1779   basic_block def_bb;
1780   unsigned i, len;
1781
1782   gcc_assert (loop_depth (loop) > 0);
1783
1784   if (is_gimple_min_invariant (expr))
1785     return true;
1786
1787   if (TREE_CODE (expr) == SSA_NAME)
1788     {
1789       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1790       if (def_bb
1791           && flow_bb_inside_loop_p (loop, def_bb))
1792         return false;
1793
1794       return true;
1795     }
1796
1797   if (!EXPR_P (expr))
1798     return false;
1799
1800   len = TREE_OPERAND_LENGTH (expr);
1801   for (i = 0; i < len; i++)
1802     if (TREE_OPERAND (expr, i)
1803         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1804       return false;
1805
1806   return true;
1807 }
1808
1809 /* Given expression EXPR which computes inductive values with respect
1810    to loop recorded in DATA, this function returns biv from which EXPR
1811    is derived by tracing definition chains of ssa variables in EXPR.  */
1812
1813 static struct iv*
1814 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1815 {
1816   struct iv *iv;
1817   unsigned i, n;
1818   tree e2, e1;
1819   enum tree_code code;
1820   gimple *stmt;
1821
1822   if (expr == NULL_TREE)
1823     return NULL;
1824
1825   if (is_gimple_min_invariant (expr))
1826     return NULL;
1827
1828   code = TREE_CODE (expr);
1829   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1830     {
1831       n = TREE_OPERAND_LENGTH (expr);
1832       for (i = 0; i < n; i++)
1833         {
1834           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1835           if (iv)
1836             return iv;
1837         }
1838     }
1839
1840   /* Stop if it's not ssa name.  */
1841   if (code != SSA_NAME)
1842     return NULL;
1843
1844   iv = get_iv (data, expr);
1845   if (!iv || integer_zerop (iv->step))
1846     return NULL;
1847   else if (iv->biv_p)
1848     return iv;
1849
1850   stmt = SSA_NAME_DEF_STMT (expr);
1851   if (gphi *phi = dyn_cast <gphi *> (stmt))
1852     {
1853       ssa_op_iter iter;
1854       use_operand_p use_p;
1855
1856       if (virtual_operand_p (gimple_phi_result (phi)))
1857         return NULL;
1858
1859       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1860         {
1861           tree use = USE_FROM_PTR (use_p);
1862           iv = find_deriving_biv_for_expr (data, use);
1863           if (iv)
1864             return iv;
1865         }
1866       return NULL;
1867     }
1868   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1869     return NULL;
1870
1871   e1 = gimple_assign_rhs1 (stmt);
1872   code = gimple_assign_rhs_code (stmt);
1873   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1874     return find_deriving_biv_for_expr (data, e1);
1875
1876   switch (code)
1877     {
1878     case MULT_EXPR:
1879     case PLUS_EXPR:
1880     case MINUS_EXPR:
1881     case POINTER_PLUS_EXPR:
1882       /* Increments, decrements and multiplications by a constant
1883          are simple.  */
1884       e2 = gimple_assign_rhs2 (stmt);
1885       iv = find_deriving_biv_for_expr (data, e2);
1886       if (iv)
1887         return iv;
1888       gcc_fallthrough ();
1889
1890     CASE_CONVERT:
1891       /* Casts are simple.  */
1892       return find_deriving_biv_for_expr (data, e1);
1893
1894     default:
1895       break;
1896     }
1897
1898   return NULL;
1899 }
1900
1901 /* Record BIV, its predecessor and successor that they are used in
1902    address type uses.  */
1903
1904 static void
1905 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1906 {
1907   unsigned i;
1908   tree type, base_1, base_2;
1909   bitmap_iterator bi;
1910
1911   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1912       || biv->have_address_use || !biv->no_overflow)
1913     return;
1914
1915   type = TREE_TYPE (biv->base);
1916   if (!INTEGRAL_TYPE_P (type))
1917     return;
1918
1919   biv->have_address_use = true;
1920   data->bivs_not_used_in_addr--;
1921   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1922   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1923     {
1924       struct iv *iv = ver_info (data, i)->iv;
1925
1926       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1927           || iv->have_address_use || !iv->no_overflow)
1928         continue;
1929
1930       if (type != TREE_TYPE (iv->base)
1931           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1932         continue;
1933
1934       if (!operand_equal_p (biv->step, iv->step, 0))
1935         continue;
1936
1937       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1938       if (operand_equal_p (base_1, iv->base, 0)
1939           || operand_equal_p (base_2, biv->base, 0))
1940         {
1941           iv->have_address_use = true;
1942           data->bivs_not_used_in_addr--;
1943         }
1944     }
1945 }
1946
1947 /* Cumulates the steps of indices into DATA and replaces their values with the
1948    initial ones.  Returns false when the value of the index cannot be determined.
1949    Callback for for_each_index.  */
1950
1951 struct ifs_ivopts_data
1952 {
1953   struct ivopts_data *ivopts_data;
1954   gimple *stmt;
1955   tree step;
1956 };
1957
1958 static bool
1959 idx_find_step (tree base, tree *idx, void *data)
1960 {
1961   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1962   struct iv *iv;
1963   bool use_overflow_semantics = false;
1964   tree step, iv_base, iv_step, lbound, off;
1965   struct loop *loop = dta->ivopts_data->current_loop;
1966
1967   /* If base is a component ref, require that the offset of the reference
1968      be invariant.  */
1969   if (TREE_CODE (base) == COMPONENT_REF)
1970     {
1971       off = component_ref_field_offset (base);
1972       return expr_invariant_in_loop_p (loop, off);
1973     }
1974
1975   /* If base is array, first check whether we will be able to move the
1976      reference out of the loop (in order to take its address in strength
1977      reduction).  In order for this to work we need both lower bound
1978      and step to be loop invariants.  */
1979   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1980     {
1981       /* Moreover, for a range, the size needs to be invariant as well.  */
1982       if (TREE_CODE (base) == ARRAY_RANGE_REF
1983           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1984         return false;
1985
1986       step = array_ref_element_size (base);
1987       lbound = array_ref_low_bound (base);
1988
1989       if (!expr_invariant_in_loop_p (loop, step)
1990           || !expr_invariant_in_loop_p (loop, lbound))
1991         return false;
1992     }
1993
1994   if (TREE_CODE (*idx) != SSA_NAME)
1995     return true;
1996
1997   iv = get_iv (dta->ivopts_data, *idx);
1998   if (!iv)
1999     return false;
2000
2001   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2002           *&x[0], which is not folded and does not trigger the
2003           ARRAY_REF path below.  */
2004   *idx = iv->base;
2005
2006   if (integer_zerop (iv->step))
2007     return true;
2008
2009   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2010     {
2011       step = array_ref_element_size (base);
2012
2013       /* We only handle addresses whose step is an integer constant.  */
2014       if (TREE_CODE (step) != INTEGER_CST)
2015         return false;
2016     }
2017   else
2018     /* The step for pointer arithmetics already is 1 byte.  */
2019     step = size_one_node;
2020
2021   iv_base = iv->base;
2022   iv_step = iv->step;
2023   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2024     use_overflow_semantics = true;
2025
2026   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2027                             sizetype, &iv_base, &iv_step, dta->stmt,
2028                             use_overflow_semantics))
2029     {
2030       /* The index might wrap.  */
2031       return false;
2032     }
2033
2034   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2035   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2036
2037   if (dta->ivopts_data->bivs_not_used_in_addr)
2038     {
2039       if (!iv->biv_p)
2040         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2041
2042       record_biv_for_address_use (dta->ivopts_data, iv);
2043     }
2044   return true;
2045 }
2046
2047 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2048    object is passed to it in DATA.  */
2049
2050 static bool
2051 idx_record_use (tree base, tree *idx,
2052                 void *vdata)
2053 {
2054   struct ivopts_data *data = (struct ivopts_data *) vdata;
2055   find_interesting_uses_op (data, *idx);
2056   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2057     {
2058       find_interesting_uses_op (data, array_ref_element_size (base));
2059       find_interesting_uses_op (data, array_ref_low_bound (base));
2060     }
2061   return true;
2062 }
2063
2064 /* If we can prove that TOP = cst * BOT for some constant cst,
2065    store cst to MUL and return true.  Otherwise return false.
2066    The returned value is always sign-extended, regardless of the
2067    signedness of TOP and BOT.  */
2068
2069 static bool
2070 constant_multiple_of (tree top, tree bot, widest_int *mul)
2071 {
2072   tree mby;
2073   enum tree_code code;
2074   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2075   widest_int res, p0, p1;
2076
2077   STRIP_NOPS (top);
2078   STRIP_NOPS (bot);
2079
2080   if (operand_equal_p (top, bot, 0))
2081     {
2082       *mul = 1;
2083       return true;
2084     }
2085
2086   code = TREE_CODE (top);
2087   switch (code)
2088     {
2089     case MULT_EXPR:
2090       mby = TREE_OPERAND (top, 1);
2091       if (TREE_CODE (mby) != INTEGER_CST)
2092         return false;
2093
2094       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2095         return false;
2096
2097       *mul = wi::sext (res * wi::to_widest (mby), precision);
2098       return true;
2099
2100     case PLUS_EXPR:
2101     case MINUS_EXPR:
2102       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2103           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2104         return false;
2105
2106       if (code == MINUS_EXPR)
2107         p1 = -p1;
2108       *mul = wi::sext (p0 + p1, precision);
2109       return true;
2110
2111     case INTEGER_CST:
2112       if (TREE_CODE (bot) != INTEGER_CST)
2113         return false;
2114
2115       p0 = widest_int::from (top, SIGNED);
2116       p1 = widest_int::from (bot, SIGNED);
2117       if (p1 == 0)
2118         return false;
2119       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2120       return res == 0;
2121
2122     default:
2123       return false;
2124     }
2125 }
2126
2127 /* Return true if memory reference REF with step STEP may be unaligned.  */
2128
2129 static bool
2130 may_be_unaligned_p (tree ref, tree step)
2131 {
2132   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2133      thus they are not misaligned.  */
2134   if (TREE_CODE (ref) == TARGET_MEM_REF)
2135     return false;
2136
2137   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2138   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2139     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2140
2141   unsigned HOST_WIDE_INT bitpos;
2142   unsigned int ref_align;
2143   get_object_alignment_1 (ref, &ref_align, &bitpos);
2144   if (ref_align < align
2145       || (bitpos % align) != 0
2146       || (bitpos % BITS_PER_UNIT) != 0)
2147     return true;
2148
2149   unsigned int trailing_zeros = tree_ctz (step);
2150   if (trailing_zeros < HOST_BITS_PER_INT
2151       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2152     return true;
2153
2154   return false;
2155 }
2156
2157 /* Return true if EXPR may be non-addressable.   */
2158
2159 bool
2160 may_be_nonaddressable_p (tree expr)
2161 {
2162   switch (TREE_CODE (expr))
2163     {
2164     case TARGET_MEM_REF:
2165       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2166          target, thus they are always addressable.  */
2167       return false;
2168
2169     case MEM_REF:
2170       /* Likewise for MEM_REFs, modulo the storage order.  */
2171       return REF_REVERSE_STORAGE_ORDER (expr);
2172
2173     case BIT_FIELD_REF:
2174       if (REF_REVERSE_STORAGE_ORDER (expr))
2175         return true;
2176       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2177
2178     case COMPONENT_REF:
2179       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2180         return true;
2181       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2182              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2183
2184     case ARRAY_REF:
2185     case ARRAY_RANGE_REF:
2186       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2187         return true;
2188       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2189
2190     case VIEW_CONVERT_EXPR:
2191       /* This kind of view-conversions may wrap non-addressable objects
2192          and make them look addressable.  After some processing the
2193          non-addressability may be uncovered again, causing ADDR_EXPRs
2194          of inappropriate objects to be built.  */
2195       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2196           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2197         return true;
2198       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2199
2200     CASE_CONVERT:
2201       return true;
2202
2203     default:
2204       break;
2205     }
2206
2207   return false;
2208 }
2209
2210 /* Finds addresses in *OP_P inside STMT.  */
2211
2212 static void
2213 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2214                                tree *op_p)
2215 {
2216   tree base = *op_p, step = size_zero_node;
2217   struct iv *civ;
2218   struct ifs_ivopts_data ifs_ivopts_data;
2219
2220   /* Do not play with volatile memory references.  A bit too conservative,
2221      perhaps, but safe.  */
2222   if (gimple_has_volatile_ops (stmt))
2223     goto fail;
2224
2225   /* Ignore bitfields for now.  Not really something terribly complicated
2226      to handle.  TODO.  */
2227   if (TREE_CODE (base) == BIT_FIELD_REF)
2228     goto fail;
2229
2230   base = unshare_expr (base);
2231
2232   if (TREE_CODE (base) == TARGET_MEM_REF)
2233     {
2234       tree type = build_pointer_type (TREE_TYPE (base));
2235       tree astep;
2236
2237       if (TMR_BASE (base)
2238           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2239         {
2240           civ = get_iv (data, TMR_BASE (base));
2241           if (!civ)
2242             goto fail;
2243
2244           TMR_BASE (base) = civ->base;
2245           step = civ->step;
2246         }
2247       if (TMR_INDEX2 (base)
2248           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2249         {
2250           civ = get_iv (data, TMR_INDEX2 (base));
2251           if (!civ)
2252             goto fail;
2253
2254           TMR_INDEX2 (base) = civ->base;
2255           step = civ->step;
2256         }
2257       if (TMR_INDEX (base)
2258           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2259         {
2260           civ = get_iv (data, TMR_INDEX (base));
2261           if (!civ)
2262             goto fail;
2263
2264           TMR_INDEX (base) = civ->base;
2265           astep = civ->step;
2266
2267           if (astep)
2268             {
2269               if (TMR_STEP (base))
2270                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2271
2272               step = fold_build2 (PLUS_EXPR, type, step, astep);
2273             }
2274         }
2275
2276       if (integer_zerop (step))
2277         goto fail;
2278       base = tree_mem_ref_addr (type, base);
2279     }
2280   else
2281     {
2282       ifs_ivopts_data.ivopts_data = data;
2283       ifs_ivopts_data.stmt = stmt;
2284       ifs_ivopts_data.step = size_zero_node;
2285       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2286           || integer_zerop (ifs_ivopts_data.step))
2287         goto fail;
2288       step = ifs_ivopts_data.step;
2289
2290       /* Check that the base expression is addressable.  This needs
2291          to be done after substituting bases of IVs into it.  */
2292       if (may_be_nonaddressable_p (base))
2293         goto fail;
2294
2295       /* Moreover, on strict alignment platforms, check that it is
2296          sufficiently aligned.  */
2297       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2298         goto fail;
2299
2300       base = build_fold_addr_expr (base);
2301
2302       /* Substituting bases of IVs into the base expression might
2303          have caused folding opportunities.  */
2304       if (TREE_CODE (base) == ADDR_EXPR)
2305         {
2306           tree *ref = &TREE_OPERAND (base, 0);
2307           while (handled_component_p (*ref))
2308             ref = &TREE_OPERAND (*ref, 0);
2309           if (TREE_CODE (*ref) == MEM_REF)
2310             {
2311               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2312                                       TREE_OPERAND (*ref, 0),
2313                                       TREE_OPERAND (*ref, 1));
2314               if (tem)
2315                 *ref = tem;
2316             }
2317         }
2318     }
2319
2320   civ = alloc_iv (data, base, step);
2321   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2322   return;
2323
2324 fail:
2325   for_each_index (op_p, idx_record_use, data);
2326 }
2327
2328 /* Finds and records invariants used in STMT.  */
2329
2330 static void
2331 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2332 {
2333   ssa_op_iter iter;
2334   use_operand_p use_p;
2335   tree op;
2336
2337   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2338     {
2339       op = USE_FROM_PTR (use_p);
2340       record_invariant (data, op, false);
2341     }
2342 }
2343
2344 /* Finds interesting uses of induction variables in the statement STMT.  */
2345
2346 static void
2347 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2348 {
2349   struct iv *iv;
2350   tree op, *lhs, *rhs;
2351   ssa_op_iter iter;
2352   use_operand_p use_p;
2353   enum tree_code code;
2354
2355   find_invariants_stmt (data, stmt);
2356
2357   if (gimple_code (stmt) == GIMPLE_COND)
2358     {
2359       find_interesting_uses_cond (data, stmt);
2360       return;
2361     }
2362
2363   if (is_gimple_assign (stmt))
2364     {
2365       lhs = gimple_assign_lhs_ptr (stmt);
2366       rhs = gimple_assign_rhs1_ptr (stmt);
2367
2368       if (TREE_CODE (*lhs) == SSA_NAME)
2369         {
2370           /* If the statement defines an induction variable, the uses are not
2371              interesting by themselves.  */
2372
2373           iv = get_iv (data, *lhs);
2374
2375           if (iv && !integer_zerop (iv->step))
2376             return;
2377         }
2378
2379       code = gimple_assign_rhs_code (stmt);
2380       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2381           && (REFERENCE_CLASS_P (*rhs)
2382               || is_gimple_val (*rhs)))
2383         {
2384           if (REFERENCE_CLASS_P (*rhs))
2385             find_interesting_uses_address (data, stmt, rhs);
2386           else
2387             find_interesting_uses_op (data, *rhs);
2388
2389           if (REFERENCE_CLASS_P (*lhs))
2390             find_interesting_uses_address (data, stmt, lhs);
2391           return;
2392         }
2393       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2394         {
2395           find_interesting_uses_cond (data, stmt);
2396           return;
2397         }
2398
2399       /* TODO -- we should also handle address uses of type
2400
2401          memory = call (whatever);
2402
2403          and
2404
2405          call (memory).  */
2406     }
2407
2408   if (gimple_code (stmt) == GIMPLE_PHI
2409       && gimple_bb (stmt) == data->current_loop->header)
2410     {
2411       iv = get_iv (data, PHI_RESULT (stmt));
2412
2413       if (iv && !integer_zerop (iv->step))
2414         return;
2415     }
2416
2417   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2418     {
2419       op = USE_FROM_PTR (use_p);
2420
2421       if (TREE_CODE (op) != SSA_NAME)
2422         continue;
2423
2424       iv = get_iv (data, op);
2425       if (!iv)
2426         continue;
2427
2428       find_interesting_uses_op (data, op);
2429     }
2430 }
2431
2432 /* Finds interesting uses of induction variables outside of loops
2433    on loop exit edge EXIT.  */
2434
2435 static void
2436 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2437 {
2438   gphi *phi;
2439   gphi_iterator psi;
2440   tree def;
2441
2442   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2443     {
2444       phi = psi.phi ();
2445       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2446       if (!virtual_operand_p (def))
2447         find_interesting_uses_op (data, def);
2448     }
2449 }
2450
2451 /* Compute maximum offset of [base + offset] addressing mode
2452    for memory reference represented by USE.  */
2453
2454 static HOST_WIDE_INT
2455 compute_max_addr_offset (struct iv_use *use)
2456 {
2457   int width;
2458   rtx reg, addr;
2459   HOST_WIDE_INT i, off;
2460   unsigned list_index, num;
2461   addr_space_t as;
2462   machine_mode mem_mode, addr_mode;
2463   static vec<HOST_WIDE_INT> max_offset_list;
2464
2465   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2466   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2467
2468   num = max_offset_list.length ();
2469   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2470   if (list_index >= num)
2471     {
2472       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2473       for (; num < max_offset_list.length (); num++)
2474         max_offset_list[num] = -1;
2475     }
2476
2477   off = max_offset_list[list_index];
2478   if (off != -1)
2479     return off;
2480
2481   addr_mode = targetm.addr_space.address_mode (as);
2482   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2483   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2484
2485   width = GET_MODE_BITSIZE (addr_mode) - 1;
2486   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2487     width = HOST_BITS_PER_WIDE_INT - 1;
2488
2489   for (i = width; i > 0; i--)
2490     {
2491       off = (HOST_WIDE_INT_1U << i) - 1;
2492       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2493       if (memory_address_addr_space_p (mem_mode, addr, as))
2494         break;
2495
2496       /* For some strict-alignment targets, the offset must be naturally
2497          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2498       off = (HOST_WIDE_INT_1U << i);
2499       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2500         {
2501           off -= GET_MODE_SIZE (mem_mode);
2502           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2503           if (memory_address_addr_space_p (mem_mode, addr, as))
2504             break;
2505         }
2506     }
2507   if (i == 0)
2508     off = 0;
2509
2510   max_offset_list[list_index] = off;
2511   return off;
2512 }
2513
2514 /* Comparison function to sort group in ascending order of addr_offset.  */
2515
2516 static int
2517 group_compare_offset (const void *a, const void *b)
2518 {
2519   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2520   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2521
2522   if ((*u1)->addr_offset != (*u2)->addr_offset)
2523     return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2524   else
2525     return 0;
2526 }
2527
2528 /* Check if small groups should be split.  Return true if no group
2529    contains more than two uses with distinct addr_offsets.  Return
2530    false otherwise.  We want to split such groups because:
2531
2532      1) Small groups don't have much benefit and may interfer with
2533         general candidate selection.
2534      2) Size for problem with only small groups is usually small and
2535         general algorithm can handle it well.
2536
2537    TODO -- Above claim may not hold when we want to merge memory
2538    accesses with conseuctive addresses.  */
2539
2540 static bool
2541 split_small_address_groups_p (struct ivopts_data *data)
2542 {
2543   unsigned int i, j, distinct = 1;
2544   struct iv_use *pre;
2545   struct iv_group *group;
2546
2547   for (i = 0; i < data->vgroups.length (); i++)
2548     {
2549       group = data->vgroups[i];
2550       if (group->vuses.length () == 1)
2551         continue;
2552
2553       gcc_assert (group->type == USE_ADDRESS);
2554       if (group->vuses.length () == 2)
2555         {
2556           if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2557             std::swap (group->vuses[0], group->vuses[1]);
2558         }
2559       else
2560         group->vuses.qsort (group_compare_offset);
2561
2562       if (distinct > 2)
2563         continue;
2564
2565       distinct = 1;
2566       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2567         {
2568           if (group->vuses[j]->addr_offset != pre->addr_offset)
2569             {
2570               pre = group->vuses[j];
2571               distinct++;
2572             }
2573
2574           if (distinct > 2)
2575             break;
2576         }
2577     }
2578
2579   return (distinct <= 2);
2580 }
2581
2582 /* For each group of address type uses, this function further groups
2583    these uses according to the maximum offset supported by target's
2584    [base + offset] addressing mode.  */
2585
2586 static void
2587 split_address_groups (struct ivopts_data *data)
2588 {
2589   unsigned int i, j;
2590   HOST_WIDE_INT max_offset = -1;
2591
2592   /* Reset max offset to split all small groups.  */
2593   if (split_small_address_groups_p (data))
2594     max_offset = 0;
2595
2596   for (i = 0; i < data->vgroups.length (); i++)
2597     {
2598       struct iv_group *group = data->vgroups[i];
2599       struct iv_use *use = group->vuses[0];
2600
2601       use->id = 0;
2602       use->group_id = group->id;
2603       if (group->vuses.length () == 1)
2604         continue;
2605
2606       if (max_offset != 0)
2607         max_offset = compute_max_addr_offset (use);
2608
2609       for (j = 1; j < group->vuses.length (); j++)
2610         {
2611           struct iv_use *next = group->vuses[j];
2612
2613           /* Only uses with offset that can fit in offset part against
2614              the first use can be grouped together.  */
2615           if (next->addr_offset - use->addr_offset
2616               > (unsigned HOST_WIDE_INT) max_offset)
2617             break;
2618
2619           next->id = j;
2620           next->group_id = group->id;
2621         }
2622       /* Split group.  */
2623       if (j < group->vuses.length ())
2624         {
2625           struct iv_group *new_group = record_group (data, group->type);
2626           new_group->vuses.safe_splice (group->vuses);
2627           new_group->vuses.block_remove (0, j);
2628           group->vuses.truncate (j);
2629         }
2630     }
2631 }
2632
2633 /* Finds uses of the induction variables that are interesting.  */
2634
2635 static void
2636 find_interesting_uses (struct ivopts_data *data)
2637 {
2638   basic_block bb;
2639   gimple_stmt_iterator bsi;
2640   basic_block *body = get_loop_body (data->current_loop);
2641   unsigned i;
2642   edge e;
2643
2644   for (i = 0; i < data->current_loop->num_nodes; i++)
2645     {
2646       edge_iterator ei;
2647       bb = body[i];
2648
2649       FOR_EACH_EDGE (e, ei, bb->succs)
2650         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2651             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2652           find_interesting_uses_outside (data, e);
2653
2654       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2655         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2656       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2657         if (!is_gimple_debug (gsi_stmt (bsi)))
2658           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2659     }
2660
2661   split_address_groups (data);
2662
2663   if (dump_file && (dump_flags & TDF_DETAILS))
2664     {
2665       bitmap_iterator bi;
2666
2667       fprintf (dump_file, "\n<Invariant Vars>:\n");
2668       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2669         {
2670           struct version_info *info = ver_info (data, i);
2671           if (info->inv_id)
2672             {
2673               fprintf (dump_file, "Inv %d:\t", info->inv_id);
2674               print_generic_expr (dump_file, info->name, TDF_SLIM);
2675               fprintf (dump_file, "%s\n",
2676                        info->has_nonlin_use ? "" : "\t(eliminable)");
2677             }
2678         }
2679
2680       fprintf (dump_file, "\n<IV Groups>:\n");
2681       dump_groups (dump_file, data);
2682       fprintf (dump_file, "\n");
2683     }
2684
2685   free (body);
2686 }
2687
2688 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2689    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2690    we are at the top-level of the processed address.  */
2691
2692 static tree
2693 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2694                 HOST_WIDE_INT *offset)
2695 {
2696   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2697   enum tree_code code;
2698   tree type, orig_type = TREE_TYPE (expr);
2699   HOST_WIDE_INT off0, off1, st;
2700   tree orig_expr = expr;
2701
2702   STRIP_NOPS (expr);
2703
2704   type = TREE_TYPE (expr);
2705   code = TREE_CODE (expr);
2706   *offset = 0;
2707
2708   switch (code)
2709     {
2710     case INTEGER_CST:
2711       if (!cst_and_fits_in_hwi (expr)
2712           || integer_zerop (expr))
2713         return orig_expr;
2714
2715       *offset = int_cst_value (expr);
2716       return build_int_cst (orig_type, 0);
2717
2718     case POINTER_PLUS_EXPR:
2719     case PLUS_EXPR:
2720     case MINUS_EXPR:
2721       op0 = TREE_OPERAND (expr, 0);
2722       op1 = TREE_OPERAND (expr, 1);
2723
2724       op0 = strip_offset_1 (op0, false, false, &off0);
2725       op1 = strip_offset_1 (op1, false, false, &off1);
2726
2727       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2728       if (op0 == TREE_OPERAND (expr, 0)
2729           && op1 == TREE_OPERAND (expr, 1))
2730         return orig_expr;
2731
2732       if (integer_zerop (op1))
2733         expr = op0;
2734       else if (integer_zerop (op0))
2735         {
2736           if (code == MINUS_EXPR)
2737             expr = fold_build1 (NEGATE_EXPR, type, op1);
2738           else
2739             expr = op1;
2740         }
2741       else
2742         expr = fold_build2 (code, type, op0, op1);
2743
2744       return fold_convert (orig_type, expr);
2745
2746     case MULT_EXPR:
2747       op1 = TREE_OPERAND (expr, 1);
2748       if (!cst_and_fits_in_hwi (op1))
2749         return orig_expr;
2750
2751       op0 = TREE_OPERAND (expr, 0);
2752       op0 = strip_offset_1 (op0, false, false, &off0);
2753       if (op0 == TREE_OPERAND (expr, 0))
2754         return orig_expr;
2755
2756       *offset = off0 * int_cst_value (op1);
2757       if (integer_zerop (op0))
2758         expr = op0;
2759       else
2760         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2761
2762       return fold_convert (orig_type, expr);
2763
2764     case ARRAY_REF:
2765     case ARRAY_RANGE_REF:
2766       if (!inside_addr)
2767         return orig_expr;
2768
2769       step = array_ref_element_size (expr);
2770       if (!cst_and_fits_in_hwi (step))
2771         break;
2772
2773       st = int_cst_value (step);
2774       op1 = TREE_OPERAND (expr, 1);
2775       op1 = strip_offset_1 (op1, false, false, &off1);
2776       *offset = off1 * st;
2777
2778       if (top_compref
2779           && integer_zerop (op1))
2780         {
2781           /* Strip the component reference completely.  */
2782           op0 = TREE_OPERAND (expr, 0);
2783           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2784           *offset += off0;
2785           return op0;
2786         }
2787       break;
2788
2789     case COMPONENT_REF:
2790       {
2791         tree field;
2792
2793         if (!inside_addr)
2794           return orig_expr;
2795
2796         tmp = component_ref_field_offset (expr);
2797         field = TREE_OPERAND (expr, 1);
2798         if (top_compref
2799             && cst_and_fits_in_hwi (tmp)
2800             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2801           {
2802             HOST_WIDE_INT boffset, abs_off;
2803
2804             /* Strip the component reference completely.  */
2805             op0 = TREE_OPERAND (expr, 0);
2806             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2807             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2808             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2809             if (boffset < 0)
2810               abs_off = -abs_off;
2811
2812             *offset = off0 + int_cst_value (tmp) + abs_off;
2813             return op0;
2814           }
2815       }
2816       break;
2817
2818     case ADDR_EXPR:
2819       op0 = TREE_OPERAND (expr, 0);
2820       op0 = strip_offset_1 (op0, true, true, &off0);
2821       *offset += off0;
2822
2823       if (op0 == TREE_OPERAND (expr, 0))
2824         return orig_expr;
2825
2826       expr = build_fold_addr_expr (op0);
2827       return fold_convert (orig_type, expr);
2828
2829     case MEM_REF:
2830       /* ???  Offset operand?  */
2831       inside_addr = false;
2832       break;
2833
2834     default:
2835       return orig_expr;
2836     }
2837
2838   /* Default handling of expressions for that we want to recurse into
2839      the first operand.  */
2840   op0 = TREE_OPERAND (expr, 0);
2841   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2842   *offset += off0;
2843
2844   if (op0 == TREE_OPERAND (expr, 0)
2845       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2846     return orig_expr;
2847
2848   expr = copy_node (expr);
2849   TREE_OPERAND (expr, 0) = op0;
2850   if (op1)
2851     TREE_OPERAND (expr, 1) = op1;
2852
2853   /* Inside address, we might strip the top level component references,
2854      thus changing type of the expression.  Handling of ADDR_EXPR
2855      will fix that.  */
2856   expr = fold_convert (orig_type, expr);
2857
2858   return expr;
2859 }
2860
2861 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2862
2863 static tree
2864 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2865 {
2866   HOST_WIDE_INT off;
2867   tree core = strip_offset_1 (expr, false, false, &off);
2868   *offset = off;
2869   return core;
2870 }
2871
2872 /* Returns variant of TYPE that can be used as base for different uses.
2873    We return unsigned type with the same precision, which avoids problems
2874    with overflows.  */
2875
2876 static tree
2877 generic_type_for (tree type)
2878 {
2879   if (POINTER_TYPE_P (type))
2880     return unsigned_type_for (type);
2881
2882   if (TYPE_UNSIGNED (type))
2883     return type;
2884
2885   return unsigned_type_for (type);
2886 }
2887
2888 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2889    the bitmap to that we should store it.  */
2890
2891 static struct ivopts_data *fd_ivopts_data;
2892 static tree
2893 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2894 {
2895   bitmap *depends_on = (bitmap *) data;
2896   struct version_info *info;
2897
2898   if (TREE_CODE (*expr_p) != SSA_NAME)
2899     return NULL_TREE;
2900   info = name_info (fd_ivopts_data, *expr_p);
2901
2902   if (!info->inv_id || info->has_nonlin_use)
2903     return NULL_TREE;
2904
2905   if (!*depends_on)
2906     *depends_on = BITMAP_ALLOC (NULL);
2907   bitmap_set_bit (*depends_on, info->inv_id);
2908
2909   return NULL_TREE;
2910 }
2911
2912 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2913    position to POS.  If USE is not NULL, the candidate is set as related to
2914    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2915    replacement of the final value of the iv by a direct computation.  */
2916
2917 static struct iv_cand *
2918 add_candidate_1 (struct ivopts_data *data,
2919                  tree base, tree step, bool important, enum iv_position pos,
2920                  struct iv_use *use, gimple *incremented_at,
2921                  struct iv *orig_iv = NULL)
2922 {
2923   unsigned i;
2924   struct iv_cand *cand = NULL;
2925   tree type, orig_type;
2926
2927   gcc_assert (base && step);
2928
2929   /* -fkeep-gc-roots-live means that we have to keep a real pointer
2930      live, but the ivopts code may replace a real pointer with one
2931      pointing before or after the memory block that is then adjusted
2932      into the memory block during the loop.  FIXME: It would likely be
2933      better to actually force the pointer live and still use ivopts;
2934      for example, it would be enough to write the pointer into memory
2935      and keep it there until after the loop.  */
2936   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2937     return NULL;
2938
2939   /* For non-original variables, make sure their values are computed in a type
2940      that does not invoke undefined behavior on overflows (since in general,
2941      we cannot prove that these induction variables are non-wrapping).  */
2942   if (pos != IP_ORIGINAL)
2943     {
2944       orig_type = TREE_TYPE (base);
2945       type = generic_type_for (orig_type);
2946       if (type != orig_type)
2947         {
2948           base = fold_convert (type, base);
2949           step = fold_convert (type, step);
2950         }
2951     }
2952
2953   for (i = 0; i < data->vcands.length (); i++)
2954     {
2955       cand = data->vcands[i];
2956
2957       if (cand->pos != pos)
2958         continue;
2959
2960       if (cand->incremented_at != incremented_at
2961           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2962               && cand->ainc_use != use))
2963         continue;
2964
2965       if (operand_equal_p (base, cand->iv->base, 0)
2966           && operand_equal_p (step, cand->iv->step, 0)
2967           && (TYPE_PRECISION (TREE_TYPE (base))
2968               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2969         break;
2970     }
2971
2972   if (i == data->vcands.length ())
2973     {
2974       cand = XCNEW (struct iv_cand);
2975       cand->id = i;
2976       cand->iv = alloc_iv (data, base, step);
2977       cand->pos = pos;
2978       if (pos != IP_ORIGINAL)
2979         {
2980           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2981           cand->var_after = cand->var_before;
2982         }
2983       cand->important = important;
2984       cand->incremented_at = incremented_at;
2985       data->vcands.safe_push (cand);
2986
2987       if (TREE_CODE (step) != INTEGER_CST)
2988         {
2989           fd_ivopts_data = data;
2990           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2991         }
2992
2993       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2994         cand->ainc_use = use;
2995       else
2996         cand->ainc_use = NULL;
2997
2998       cand->orig_iv = orig_iv;
2999       if (dump_file && (dump_flags & TDF_DETAILS))
3000         dump_cand (dump_file, cand);
3001     }
3002
3003   cand->important |= important;
3004
3005   /* Relate candidate to the group for which it is added.  */
3006   if (use)
3007     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3008
3009   return cand;
3010 }
3011
3012 /* Returns true if incrementing the induction variable at the end of the LOOP
3013    is allowed.
3014
3015    The purpose is to avoid splitting latch edge with a biv increment, thus
3016    creating a jump, possibly confusing other optimization passes and leaving
3017    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
3018    is not available (so we do not have a better alternative), or if the latch
3019    edge is already nonempty.  */
3020
3021 static bool
3022 allow_ip_end_pos_p (struct loop *loop)
3023 {
3024   if (!ip_normal_pos (loop))
3025     return true;
3026
3027   if (!empty_block_p (ip_end_pos (loop)))
3028     return true;
3029
3030   return false;
3031 }
3032
3033 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3034    Important field is set to IMPORTANT.  */
3035
3036 static void
3037 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3038                         bool important, struct iv_use *use)
3039 {
3040   basic_block use_bb = gimple_bb (use->stmt);
3041   machine_mode mem_mode;
3042   unsigned HOST_WIDE_INT cstepi;
3043
3044   /* If we insert the increment in any position other than the standard
3045      ones, we must ensure that it is incremented once per iteration.
3046      It must not be in an inner nested loop, or one side of an if
3047      statement.  */
3048   if (use_bb->loop_father != data->current_loop
3049       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3050       || stmt_could_throw_p (use->stmt)
3051       || !cst_and_fits_in_hwi (step))
3052     return;
3053
3054   cstepi = int_cst_value (step);
3055
3056   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3057   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3058         || USE_STORE_PRE_INCREMENT (mem_mode))
3059        && GET_MODE_SIZE (mem_mode) == cstepi)
3060       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3061            || USE_STORE_PRE_DECREMENT (mem_mode))
3062           && GET_MODE_SIZE (mem_mode) == -cstepi))
3063     {
3064       enum tree_code code = MINUS_EXPR;
3065       tree new_base;
3066       tree new_step = step;
3067
3068       if (POINTER_TYPE_P (TREE_TYPE (base)))
3069         {
3070           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3071           code = POINTER_PLUS_EXPR;
3072         }
3073       else
3074         new_step = fold_convert (TREE_TYPE (base), new_step);
3075       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3076       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3077                        use->stmt);
3078     }
3079   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3080         || USE_STORE_POST_INCREMENT (mem_mode))
3081        && GET_MODE_SIZE (mem_mode) == cstepi)
3082       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3083            || USE_STORE_POST_DECREMENT (mem_mode))
3084           && GET_MODE_SIZE (mem_mode) == -cstepi))
3085     {
3086       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3087                        use->stmt);
3088     }
3089 }
3090
3091 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3092    position to POS.  If USE is not NULL, the candidate is set as related to
3093    it.  The candidate computation is scheduled before exit condition and at
3094    the end of loop.  */
3095
3096 static void
3097 add_candidate (struct ivopts_data *data,
3098                tree base, tree step, bool important, struct iv_use *use,
3099                struct iv *orig_iv = NULL)
3100 {
3101   if (ip_normal_pos (data->current_loop))
3102     add_candidate_1 (data, base, step, important,
3103                      IP_NORMAL, use, NULL, orig_iv);
3104   if (ip_end_pos (data->current_loop)
3105       && allow_ip_end_pos_p (data->current_loop))
3106     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3107 }
3108
3109 /* Adds standard iv candidates.  */
3110
3111 static void
3112 add_standard_iv_candidates (struct ivopts_data *data)
3113 {
3114   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3115
3116   /* The same for a double-integer type if it is still fast enough.  */
3117   if (TYPE_PRECISION
3118         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3119       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3120     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3121                    build_int_cst (long_integer_type_node, 1), true, NULL);
3122
3123   /* The same for a double-integer type if it is still fast enough.  */
3124   if (TYPE_PRECISION
3125         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3126       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3127     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3128                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3129 }
3130
3131
3132 /* Adds candidates bases on the old induction variable IV.  */
3133
3134 static void
3135 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3136 {
3137   gimple *phi;
3138   tree def;
3139   struct iv_cand *cand;
3140
3141   /* Check if this biv is used in address type use.  */
3142   if (iv->no_overflow  && iv->have_address_use
3143       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3144       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3145     {
3146       tree base = fold_convert (sizetype, iv->base);
3147       tree step = fold_convert (sizetype, iv->step);
3148
3149       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3150       add_candidate (data, base, step, true, NULL, iv);
3151       /* Add iv cand of the original type only if it has nonlinear use.  */
3152       if (iv->nonlin_use)
3153         add_candidate (data, iv->base, iv->step, true, NULL);
3154     }
3155   else
3156     add_candidate (data, iv->base, iv->step, true, NULL);
3157
3158   /* The same, but with initial value zero.  */
3159   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3160     add_candidate (data, size_int (0), iv->step, true, NULL);
3161   else
3162     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3163                    iv->step, true, NULL);
3164
3165   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3166   if (gimple_code (phi) == GIMPLE_PHI)
3167     {
3168       /* Additionally record the possibility of leaving the original iv
3169          untouched.  */
3170       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3171       /* Don't add candidate if it's from another PHI node because
3172          it's an affine iv appearing in the form of PEELED_CHREC.  */
3173       phi = SSA_NAME_DEF_STMT (def);
3174       if (gimple_code (phi) != GIMPLE_PHI)
3175         {
3176           cand = add_candidate_1 (data,
3177                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3178                                   SSA_NAME_DEF_STMT (def));
3179           if (cand)
3180             {
3181               cand->var_before = iv->ssa_name;
3182               cand->var_after = def;
3183             }
3184         }
3185       else
3186         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3187     }
3188 }
3189
3190 /* Adds candidates based on the old induction variables.  */
3191
3192 static void
3193 add_iv_candidate_for_bivs (struct ivopts_data *data)
3194 {
3195   unsigned i;
3196   struct iv *iv;
3197   bitmap_iterator bi;
3198
3199   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3200     {
3201       iv = ver_info (data, i)->iv;
3202       if (iv && iv->biv_p && !integer_zerop (iv->step))
3203         add_iv_candidate_for_biv (data, iv);
3204     }
3205 }
3206
3207 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3208
3209 static void
3210 record_common_cand (struct ivopts_data *data, tree base,
3211                     tree step, struct iv_use *use)
3212 {
3213   struct iv_common_cand ent;
3214   struct iv_common_cand **slot;
3215
3216   ent.base = base;
3217   ent.step = step;
3218   ent.hash = iterative_hash_expr (base, 0);
3219   ent.hash = iterative_hash_expr (step, ent.hash);
3220
3221   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3222   if (*slot == NULL)
3223     {
3224       *slot = new iv_common_cand ();
3225       (*slot)->base = base;
3226       (*slot)->step = step;
3227       (*slot)->uses.create (8);
3228       (*slot)->hash = ent.hash;
3229       data->iv_common_cands.safe_push ((*slot));
3230     }
3231
3232   gcc_assert (use != NULL);
3233   (*slot)->uses.safe_push (use);
3234   return;
3235 }
3236
3237 /* Comparison function used to sort common candidates.  */
3238
3239 static int
3240 common_cand_cmp (const void *p1, const void *p2)
3241 {
3242   unsigned n1, n2;
3243   const struct iv_common_cand *const *const ccand1
3244     = (const struct iv_common_cand *const *)p1;
3245   const struct iv_common_cand *const *const ccand2
3246     = (const struct iv_common_cand *const *)p2;
3247
3248   n1 = (*ccand1)->uses.length ();
3249   n2 = (*ccand2)->uses.length ();
3250   return n2 - n1;
3251 }
3252
3253 /* Adds IV candidates based on common candidated recorded.  */
3254
3255 static void
3256 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3257 {
3258   unsigned i, j;
3259   struct iv_cand *cand_1, *cand_2;
3260
3261   data->iv_common_cands.qsort (common_cand_cmp);
3262   for (i = 0; i < data->iv_common_cands.length (); i++)
3263     {
3264       struct iv_common_cand *ptr = data->iv_common_cands[i];
3265
3266       /* Only add IV candidate if it's derived from multiple uses.  */
3267       if (ptr->uses.length () <= 1)
3268         break;
3269
3270       cand_1 = NULL;
3271       cand_2 = NULL;
3272       if (ip_normal_pos (data->current_loop))
3273         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3274                                   false, IP_NORMAL, NULL, NULL);
3275
3276       if (ip_end_pos (data->current_loop)
3277           && allow_ip_end_pos_p (data->current_loop))
3278         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3279                                   false, IP_END, NULL, NULL);
3280
3281       /* Bind deriving uses and the new candidates.  */
3282       for (j = 0; j < ptr->uses.length (); j++)
3283         {
3284           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3285           if (cand_1)
3286             bitmap_set_bit (group->related_cands, cand_1->id);
3287           if (cand_2)
3288             bitmap_set_bit (group->related_cands, cand_2->id);
3289         }
3290     }
3291
3292   /* Release data since it is useless from this point.  */
3293   data->iv_common_cand_tab->empty ();
3294   data->iv_common_cands.truncate (0);
3295 }
3296
3297 /* Adds candidates based on the value of USE's iv.  */
3298
3299 static void
3300 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3301 {
3302   unsigned HOST_WIDE_INT offset;
3303   tree base;
3304   tree basetype;
3305   struct iv *iv = use->iv;
3306
3307   add_candidate (data, iv->base, iv->step, false, use);
3308
3309   /* Record common candidate for use in case it can be shared by others.  */
3310   record_common_cand (data, iv->base, iv->step, use);
3311
3312   /* Record common candidate with initial value zero.  */
3313   basetype = TREE_TYPE (iv->base);
3314   if (POINTER_TYPE_P (basetype))
3315     basetype = sizetype;
3316   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3317
3318   /* Record common candidate with constant offset stripped in base.
3319      Like the use itself, we also add candidate directly for it.  */
3320   base = strip_offset (iv->base, &offset);
3321   if (offset || base != iv->base)
3322     {
3323       record_common_cand (data, base, iv->step, use);
3324       add_candidate (data, base, iv->step, false, use);
3325     }
3326
3327   /* Record common candidate with base_object removed in base.  */
3328   if (iv->base_object != NULL)
3329     {
3330       unsigned i;
3331       aff_tree aff_base;
3332       tree step, base_object = iv->base_object;
3333
3334       base = iv->base;
3335       step = iv->step;
3336       STRIP_NOPS (base);
3337       STRIP_NOPS (step);
3338       STRIP_NOPS (base_object);
3339       tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3340       for (i = 0; i < aff_base.n; i++)
3341         {
3342           if (aff_base.elts[i].coef != 1)
3343             continue;
3344
3345           if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3346             break;
3347         }
3348       if (i < aff_base.n)
3349         {
3350           aff_combination_remove_elt (&aff_base, i);
3351           base = aff_combination_to_tree (&aff_base);
3352           basetype = TREE_TYPE (base);
3353           if (POINTER_TYPE_P (basetype))
3354             basetype = sizetype;
3355
3356           step = fold_convert (basetype, step);
3357           record_common_cand (data, base, step, use);
3358           /* Also record common candidate with offset stripped.  */
3359           base = strip_offset (base, &offset);
3360           if (offset)
3361             record_common_cand (data, base, step, use);
3362         }
3363     }
3364
3365   /* At last, add auto-incremental candidates.  Make such variables
3366      important since other iv uses with same base object may be based
3367      on it.  */
3368   if (use != NULL && use->type == USE_ADDRESS)
3369     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3370 }
3371
3372 /* Adds candidates based on the uses.  */
3373
3374 static void
3375 add_iv_candidate_for_groups (struct ivopts_data *data)
3376 {
3377   unsigned i;
3378
3379   /* Only add candidate for the first use in group.  */
3380   for (i = 0; i < data->vgroups.length (); i++)
3381     {
3382       struct iv_group *group = data->vgroups[i];
3383
3384       gcc_assert (group->vuses[0] != NULL);
3385       add_iv_candidate_for_use (data, group->vuses[0]);
3386     }
3387   add_iv_candidate_derived_from_uses (data);
3388 }
3389
3390 /* Record important candidates and add them to related_cands bitmaps.  */
3391
3392 static void
3393 record_important_candidates (struct ivopts_data *data)
3394 {
3395   unsigned i;
3396   struct iv_group *group;
3397
3398   for (i = 0; i < data->vcands.length (); i++)
3399     {
3400       struct iv_cand *cand = data->vcands[i];
3401
3402       if (cand->important)
3403         bitmap_set_bit (data->important_candidates, i);
3404     }
3405
3406   data->consider_all_candidates = (data->vcands.length ()
3407                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3408
3409   /* Add important candidates to groups' related_cands bitmaps.  */
3410   for (i = 0; i < data->vgroups.length (); i++)
3411     {
3412       group = data->vgroups[i];
3413       bitmap_ior_into (group->related_cands, data->important_candidates);
3414     }
3415 }
3416
3417 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3418    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3419    we allocate a simple list to every use.  */
3420
3421 static void
3422 alloc_use_cost_map (struct ivopts_data *data)
3423 {
3424   unsigned i, size, s;
3425
3426   for (i = 0; i < data->vgroups.length (); i++)
3427     {
3428       struct iv_group *group = data->vgroups[i];
3429
3430       if (data->consider_all_candidates)
3431         size = data->vcands.length ();
3432       else
3433         {
3434           s = bitmap_count_bits (group->related_cands);
3435
3436           /* Round up to the power of two, so that moduling by it is fast.  */
3437           size = s ? (1 << ceil_log2 (s)) : 1;
3438         }
3439
3440       group->n_map_members = size;
3441       group->cost_map = XCNEWVEC (struct cost_pair, size);
3442     }
3443 }
3444
3445 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3446    on invariants DEPENDS_ON and that the value used in expressing it
3447    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3448
3449 static void
3450 set_group_iv_cost (struct ivopts_data *data,
3451                    struct iv_group *group, struct iv_cand *cand,
3452                    comp_cost cost, bitmap depends_on, tree value,
3453                    enum tree_code comp, iv_inv_expr_ent *inv_expr)
3454 {
3455   unsigned i, s;
3456
3457   if (cost.infinite_cost_p ())
3458     {
3459       BITMAP_FREE (depends_on);
3460       return;
3461     }
3462
3463   if (data->consider_all_candidates)
3464     {
3465       group->cost_map[cand->id].cand = cand;
3466       group->cost_map[cand->id].cost = cost;
3467       group->cost_map[cand->id].depends_on = depends_on;
3468       group->cost_map[cand->id].value = value;
3469       group->cost_map[cand->id].comp = comp;
3470       group->cost_map[cand->id].inv_expr = inv_expr;
3471       return;
3472     }
3473
3474   /* n_map_members is a power of two, so this computes modulo.  */
3475   s = cand->id & (group->n_map_members - 1);
3476   for (i = s; i < group->n_map_members; i++)
3477     if (!group->cost_map[i].cand)
3478       goto found;
3479   for (i = 0; i < s; i++)
3480     if (!group->cost_map[i].cand)
3481       goto found;
3482
3483   gcc_unreachable ();
3484
3485 found:
3486   group->cost_map[i].cand = cand;
3487   group->cost_map[i].cost = cost;
3488   group->cost_map[i].depends_on = depends_on;
3489   group->cost_map[i].value = value;
3490   group->cost_map[i].comp = comp;
3491   group->cost_map[i].inv_expr = inv_expr;
3492 }
3493
3494 /* Gets cost of (GROUP, CAND) pair.  */
3495
3496 static struct cost_pair *
3497 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3498                    struct iv_cand *cand)
3499 {
3500   unsigned i, s;
3501   struct cost_pair *ret;
3502
3503   if (!cand)
3504     return NULL;
3505
3506   if (data->consider_all_candidates)
3507     {
3508       ret = group->cost_map + cand->id;
3509       if (!ret->cand)
3510         return NULL;
3511
3512       return ret;
3513     }
3514
3515   /* n_map_members is a power of two, so this computes modulo.  */
3516   s = cand->id & (group->n_map_members - 1);
3517   for (i = s; i < group->n_map_members; i++)
3518     if (group->cost_map[i].cand == cand)
3519       return group->cost_map + i;
3520     else if (group->cost_map[i].cand == NULL)
3521       return NULL;
3522   for (i = 0; i < s; i++)
3523     if (group->cost_map[i].cand == cand)
3524       return group->cost_map + i;
3525     else if (group->cost_map[i].cand == NULL)
3526       return NULL;
3527
3528   return NULL;
3529 }
3530
3531 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3532 static rtx
3533 produce_memory_decl_rtl (tree obj, int *regno)
3534 {
3535   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3536   machine_mode address_mode = targetm.addr_space.address_mode (as);
3537   rtx x;
3538
3539   gcc_assert (obj);
3540   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3541     {
3542       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3543       x = gen_rtx_SYMBOL_REF (address_mode, name);
3544       SET_SYMBOL_REF_DECL (x, obj);
3545       x = gen_rtx_MEM (DECL_MODE (obj), x);
3546       set_mem_addr_space (x, as);
3547       targetm.encode_section_info (obj, x, true);
3548     }
3549   else
3550     {
3551       x = gen_raw_REG (address_mode, (*regno)++);
3552       x = gen_rtx_MEM (DECL_MODE (obj), x);
3553       set_mem_addr_space (x, as);
3554     }
3555
3556   return x;
3557 }
3558
3559 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3560    walk_tree.  DATA contains the actual fake register number.  */
3561
3562 static tree
3563 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3564 {
3565   tree obj = NULL_TREE;
3566   rtx x = NULL_RTX;
3567   int *regno = (int *) data;
3568
3569   switch (TREE_CODE (*expr_p))
3570     {
3571     case ADDR_EXPR:
3572       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3573            handled_component_p (*expr_p);
3574            expr_p = &TREE_OPERAND (*expr_p, 0))
3575         continue;
3576       obj = *expr_p;
3577       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3578         x = produce_memory_decl_rtl (obj, regno);
3579       break;
3580
3581     case SSA_NAME:
3582       *ws = 0;
3583       obj = SSA_NAME_VAR (*expr_p);
3584       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3585       if (!obj)
3586         return NULL_TREE;
3587       if (!DECL_RTL_SET_P (obj))
3588         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3589       break;
3590
3591     case VAR_DECL:
3592     case PARM_DECL:
3593     case RESULT_DECL:
3594       *ws = 0;
3595       obj = *expr_p;
3596
3597       if (DECL_RTL_SET_P (obj))
3598         break;
3599
3600       if (DECL_MODE (obj) == BLKmode)
3601         x = produce_memory_decl_rtl (obj, regno);
3602       else
3603         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3604
3605       break;
3606
3607     default:
3608       break;
3609     }
3610
3611   if (x)
3612     {
3613       decl_rtl_to_reset.safe_push (obj);
3614       SET_DECL_RTL (obj, x);
3615     }
3616
3617   return NULL_TREE;
3618 }
3619
3620 /* Determines cost of the computation of EXPR.  */
3621
3622 static unsigned
3623 computation_cost (tree expr, bool speed)
3624 {
3625   rtx_insn *seq;
3626   rtx rslt;
3627   tree type = TREE_TYPE (expr);
3628   unsigned cost;
3629   /* Avoid using hard regs in ways which may be unsupported.  */
3630   int regno = LAST_VIRTUAL_REGISTER + 1;
3631   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3632   enum node_frequency real_frequency = node->frequency;
3633
3634   node->frequency = NODE_FREQUENCY_NORMAL;
3635   crtl->maybe_hot_insn_p = speed;
3636   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3637   start_sequence ();
3638   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3639   seq = get_insns ();
3640   end_sequence ();
3641   default_rtl_profile ();
3642   node->frequency = real_frequency;
3643
3644   cost = seq_cost (seq, speed);
3645   if (MEM_P (rslt))
3646     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3647                           TYPE_ADDR_SPACE (type), speed);
3648   else if (!REG_P (rslt))
3649     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3650
3651   return cost;
3652 }
3653
3654 /* Returns variable containing the value of candidate CAND at statement AT.  */
3655
3656 static tree
3657 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3658 {
3659   if (stmt_after_increment (loop, cand, stmt))
3660     return cand->var_after;
3661   else
3662     return cand->var_before;
3663 }
3664
3665 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3666    same precision that is at least as wide as the precision of TYPE, stores
3667    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3668    type of A and B.  */
3669
3670 static tree
3671 determine_common_wider_type (tree *a, tree *b)
3672 {
3673   tree wider_type = NULL;
3674   tree suba, subb;
3675   tree atype = TREE_TYPE (*a);
3676
3677   if (CONVERT_EXPR_P (*a))
3678     {
3679       suba = TREE_OPERAND (*a, 0);
3680       wider_type = TREE_TYPE (suba);
3681       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3682         return atype;
3683     }
3684   else
3685     return atype;
3686
3687   if (CONVERT_EXPR_P (*b))
3688     {
3689       subb = TREE_OPERAND (*b, 0);
3690       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3691         return atype;
3692     }
3693   else
3694     return atype;
3695
3696   *a = suba;
3697   *b = subb;
3698   return wider_type;
3699 }
3700
3701 /* Determines the expression by that USE is expressed from induction variable
3702    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3703    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3704
3705 static bool
3706 get_computation_aff (struct loop *loop,
3707                      struct iv_use *use, struct iv_cand *cand, gimple *at,
3708                      struct aff_tree *aff)
3709 {
3710   tree ubase = use->iv->base;
3711   tree ustep = use->iv->step;
3712   tree cbase = cand->iv->base;
3713   tree cstep = cand->iv->step, cstep_common;
3714   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3715   tree common_type, var;
3716   tree uutype;
3717   aff_tree cbase_aff, var_aff;
3718   widest_int rat;
3719
3720   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3721     {
3722       /* We do not have a precision to express the values of use.  */
3723       return false;
3724     }
3725
3726   var = var_at_stmt (loop, cand, at);
3727   uutype = unsigned_type_for (utype);
3728
3729   /* If the conversion is not noop, perform it.  */
3730   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3731     {
3732       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3733           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3734         {
3735           tree inner_base, inner_step, inner_type;
3736           inner_base = TREE_OPERAND (cbase, 0);
3737           if (CONVERT_EXPR_P (cstep))
3738             inner_step = TREE_OPERAND (cstep, 0);
3739           else
3740             inner_step = cstep;
3741
3742           inner_type = TREE_TYPE (inner_base);
3743           /* If candidate is added from a biv whose type is smaller than
3744              ctype, we know both candidate and the biv won't overflow.
3745              In this case, it's safe to skip the convertion in candidate.
3746              As an example, (unsigned short)((unsigned long)A) equals to
3747              (unsigned short)A, if A has a type no larger than short.  */
3748           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3749             {
3750               cbase = inner_base;
3751               cstep = inner_step;
3752             }
3753         }
3754       cstep = fold_convert (uutype, cstep);
3755       cbase = fold_convert (uutype, cbase);
3756       var = fold_convert (uutype, var);
3757     }
3758
3759   /* Ratio is 1 when computing the value of biv cand by itself.
3760      We can't rely on constant_multiple_of in this case because the
3761      use is created after the original biv is selected.  The call
3762      could fail because of inconsistent fold behavior.  See PR68021
3763      for more information.  */
3764   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3765     {
3766       gcc_assert (is_gimple_assign (use->stmt));
3767       gcc_assert (use->iv->ssa_name == cand->var_after);
3768       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3769       rat = 1;
3770     }
3771   else if (!constant_multiple_of (ustep, cstep, &rat))
3772     return false;
3773
3774   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3775      type, we achieve better folding by computing their difference in this
3776      wider type, and cast the result to UUTYPE.  We do not need to worry about
3777      overflows, as all the arithmetics will in the end be performed in UUTYPE
3778      anyway.  */
3779   common_type = determine_common_wider_type (&ubase, &cbase);
3780
3781   /* use = ubase - ratio * cbase + ratio * var.  */
3782   tree_to_aff_combination (ubase, common_type, aff);
3783   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3784   tree_to_aff_combination (var, uutype, &var_aff);
3785
3786   /* We need to shift the value if we are after the increment.  */
3787   if (stmt_after_increment (loop, cand, at))
3788     {
3789       aff_tree cstep_aff;
3790
3791       if (common_type != uutype)
3792         cstep_common = fold_convert (common_type, cstep);
3793       else
3794         cstep_common = cstep;
3795
3796       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3797       aff_combination_add (&cbase_aff, &cstep_aff);
3798     }
3799
3800   aff_combination_scale (&cbase_aff, -rat);
3801   aff_combination_add (aff, &cbase_aff);
3802   if (common_type != uutype)
3803     aff_combination_convert (aff, uutype);
3804
3805   aff_combination_scale (&var_aff, rat);
3806   aff_combination_add (aff, &var_aff);
3807
3808   return true;
3809 }
3810
3811 /* Return the type of USE.  */
3812
3813 static tree
3814 get_use_type (struct iv_use *use)
3815 {
3816   tree base_type = TREE_TYPE (use->iv->base);
3817   tree type;
3818
3819   if (use->type == USE_ADDRESS)
3820     {
3821       /* The base_type may be a void pointer.  Create a pointer type based on
3822          the mem_ref instead.  */
3823       type = build_pointer_type (TREE_TYPE (*use->op_p));
3824       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3825                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3826     }
3827   else
3828     type = base_type;
3829
3830   return type;
3831 }
3832
3833 /* Determines the expression by that USE is expressed from induction variable
3834    CAND at statement AT in LOOP.  The computation is unshared.  */
3835
3836 static tree
3837 get_computation_at (struct loop *loop,
3838                     struct iv_use *use, struct iv_cand *cand, gimple *at)
3839 {
3840   aff_tree aff;
3841   tree type = get_use_type (use);
3842
3843   if (!get_computation_aff (loop, use, cand, at, &aff))
3844     return NULL_TREE;
3845   unshare_aff_combination (&aff);
3846   return fold_convert (type, aff_combination_to_tree (&aff));
3847 }
3848
3849 /* Determines the expression by that USE is expressed from induction variable
3850    CAND in LOOP.  The computation is unshared.  */
3851
3852 static tree
3853 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3854 {
3855   return get_computation_at (loop, use, cand, use->stmt);
3856 }
3857
3858 /* Adjust the cost COST for being in loop setup rather than loop body.
3859    If we're optimizing for space, the loop setup overhead is constant;
3860    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3861 static unsigned
3862 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3863 {
3864   if (cost == INFTY)
3865     return cost;
3866   else if (optimize_loop_for_speed_p (data->current_loop))
3867     return cost / avg_loop_niter (data->current_loop);
3868   else
3869     return cost;
3870 }
3871
3872 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3873    validity for a memory reference accessing memory of mode MODE in
3874    address space AS.  */
3875
3876
3877 bool
3878 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3879                                  addr_space_t as)
3880 {
3881 #define MAX_RATIO 128
3882   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3883   static vec<sbitmap> valid_mult_list;
3884   sbitmap valid_mult;
3885
3886   if (data_index >= valid_mult_list.length ())
3887     valid_mult_list.safe_grow_cleared (data_index + 1);
3888
3889   valid_mult = valid_mult_list[data_index];
3890   if (!valid_mult)
3891     {
3892       machine_mode address_mode = targetm.addr_space.address_mode (as);
3893       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3894       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3895       rtx addr, scaled;
3896       HOST_WIDE_INT i;
3897
3898       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3899       bitmap_clear (valid_mult);
3900       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3901       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3902       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3903         {
3904           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3905           if (memory_address_addr_space_p (mode, addr, as)
3906               || memory_address_addr_space_p (mode, scaled, as))
3907             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3908         }
3909
3910       if (dump_file && (dump_flags & TDF_DETAILS))
3911         {
3912           fprintf (dump_file, "  allowed multipliers:");
3913           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3914             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3915               fprintf (dump_file, " %d", (int) i);
3916           fprintf (dump_file, "\n");
3917           fprintf (dump_file, "\n");
3918         }
3919
3920       valid_mult_list[data_index] = valid_mult;
3921     }
3922
3923   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3924     return false;
3925
3926   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3927 }
3928
3929 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3930    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3931    variable is omitted.  Compute the cost for a memory reference that accesses
3932    a memory location of mode MEM_MODE in address space AS.
3933
3934    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3935    size of MEM_MODE / RATIO) is available.  To make this determination, we
3936    look at the size of the increment to be made, which is given in CSTEP.
3937    CSTEP may be zero if the step is unknown.
3938    STMT_AFTER_INC is true iff the statement we're looking at is after the
3939    increment of the original biv.
3940
3941    TODO -- there must be some better way.  This all is quite crude.  */
3942
3943 enum ainc_type
3944 {
3945   AINC_PRE_INC,         /* Pre increment.  */
3946   AINC_PRE_DEC,         /* Pre decrement.  */
3947   AINC_POST_INC,        /* Post increment.  */
3948   AINC_POST_DEC,        /* Post decrement.  */
3949   AINC_NONE             /* Also the number of auto increment types.  */
3950 };
3951
3952 struct address_cost_data
3953 {
3954   HOST_WIDE_INT min_offset, max_offset;
3955   unsigned costs[2][2][2][2];
3956   unsigned ainc_costs[AINC_NONE];
3957 };
3958
3959
3960 static comp_cost
3961 get_address_cost (bool symbol_present, bool var_present,
3962                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3963                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3964                   addr_space_t as, bool speed,
3965                   bool stmt_after_inc, bool *may_autoinc)
3966 {
3967   machine_mode address_mode = targetm.addr_space.address_mode (as);
3968   static vec<address_cost_data *> address_cost_data_list;
3969   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3970   address_cost_data *data;
3971   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3972   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3973   unsigned cost, acost, complexity;
3974   enum ainc_type autoinc_type;
3975   bool offset_p, ratio_p, autoinc;
3976   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3977   unsigned HOST_WIDE_INT mask;
3978   unsigned bits;
3979
3980   if (data_index >= address_cost_data_list.length ())
3981     address_cost_data_list.safe_grow_cleared (data_index + 1);
3982
3983   data = address_cost_data_list[data_index];
3984   if (!data)
3985     {
3986       HOST_WIDE_INT i;
3987       HOST_WIDE_INT rat, off = 0;
3988       int old_cse_not_expected, width;
3989       unsigned sym_p, var_p, off_p, rat_p, add_c;
3990       rtx_insn *seq;
3991       rtx addr, base;
3992       rtx reg0, reg1;
3993
3994       data = (address_cost_data *) xcalloc (1, sizeof (*data));
3995
3996       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3997
3998       width = GET_MODE_BITSIZE (address_mode) - 1;
3999       if (width > (HOST_BITS_PER_WIDE_INT - 1))
4000         width = HOST_BITS_PER_WIDE_INT - 1;
4001       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
4002
4003       for (i = width; i >= 0; i--)
4004         {
4005           off = -(HOST_WIDE_INT_1U << i);
4006           XEXP (addr, 1) = gen_int_mode (off, address_mode);
4007           if (memory_address_addr_space_p (mem_mode, addr, as))
4008             break;
4009         }
4010       data->min_offset = (i == -1? 0 : off);
4011
4012       for (i = width; i >= 0; i--)
4013         {
4014           off = (HOST_WIDE_INT_1U << i) - 1;
4015           XEXP (addr, 1) = gen_int_mode (off, address_mode);
4016           if (memory_address_addr_space_p (mem_mode, addr, as))
4017             break;
4018           /* For some strict-alignment targets, the offset must be naturally
4019              aligned.  Try an aligned offset if mem_mode is not QImode.  */
4020           off = mem_mode != QImode
4021                 ? (HOST_WIDE_INT_1U << i)
4022                     - GET_MODE_SIZE (mem_mode)
4023                 : 0;
4024           if (off > 0)
4025             {
4026               XEXP (addr, 1) = gen_int_mode (off, address_mode);
4027               if (memory_address_addr_space_p (mem_mode, addr, as))
4028                 break;
4029             }
4030         }
4031       if (i == -1)
4032         off = 0;
4033       data->max_offset = off;
4034
4035       if (dump_file && (dump_flags & TDF_DETAILS))
4036         {
4037           fprintf (dump_file, "get_address_cost:\n");
4038           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4039                    GET_MODE_NAME (mem_mode),
4040                    data->min_offset);
4041           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4042                    GET_MODE_NAME (mem_mode),
4043                    data->max_offset);
4044         }
4045
4046       rat = 1;
4047       for (i = 2; i <= MAX_RATIO; i++)
4048         if (multiplier_allowed_in_address_p (i, mem_mode, as))
4049           {
4050             rat = i;
4051             break;
4052           }
4053
4054       /* Compute the cost of various addressing modes.  */
4055       acost = 0;
4056       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4057       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4058
4059       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4060           || USE_STORE_PRE_DECREMENT (mem_mode))
4061         {
4062           addr = gen_rtx_PRE_DEC (address_mode, reg0);
4063           has_predec[mem_mode]
4064             = memory_address_addr_space_p (mem_mode, addr, as);
4065
4066           if (has_predec[mem_mode])
4067             data->ainc_costs[AINC_PRE_DEC]
4068               = address_cost (addr, mem_mode, as, speed);
4069         }
4070       if (USE_LOAD_POST_DECREMENT (mem_mode)
4071           || USE_STORE_POST_DECREMENT (mem_mode))
4072         {
4073           addr = gen_rtx_POST_DEC (address_mode, reg0);
4074           has_postdec[mem_mode]
4075             = memory_address_addr_space_p (mem_mode, addr, as);
4076
4077           if (has_postdec[mem_mode])
4078             data->ainc_costs[AINC_POST_DEC]
4079               = address_cost (addr, mem_mode, as, speed);
4080         }
4081       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4082           || USE_STORE_PRE_DECREMENT (mem_mode))
4083         {
4084           addr = gen_rtx_PRE_INC (address_mode, reg0);
4085           has_preinc[mem_mode]
4086             = memory_address_addr_space_p (mem_mode, addr, as);
4087
4088           if (has_preinc[mem_mode])
4089             data->ainc_costs[AINC_PRE_INC]
4090               = address_cost (addr, mem_mode, as, speed);
4091         }
4092       if (USE_LOAD_POST_INCREMENT (mem_mode)
4093           || USE_STORE_POST_INCREMENT (mem_mode))
4094         {
4095           addr = gen_rtx_POST_INC (address_mode, reg0);
4096           has_postinc[mem_mode]
4097             = memory_address_addr_space_p (mem_mode, addr, as);
4098
4099           if (has_postinc[mem_mode])
4100             data->ainc_costs[AINC_POST_INC]
4101               = address_cost (addr, mem_mode, as, speed);
4102         }
4103       for (i = 0; i < 16; i++)
4104         {
4105           sym_p = i & 1;
4106           var_p = (i >> 1) & 1;
4107           off_p = (i >> 2) & 1;
4108           rat_p = (i >> 3) & 1;
4109
4110           addr = reg0;
4111           if (rat_p)
4112             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4113                                    gen_int_mode (rat, address_mode));
4114
4115           if (var_p)
4116             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4117
4118           if (sym_p)
4119             {
4120               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4121               /* ??? We can run into trouble with some backends by presenting
4122                  it with symbols which haven't been properly passed through
4123                  targetm.encode_section_info.  By setting the local bit, we
4124                  enhance the probability of things working.  */
4125               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4126
4127               if (off_p)
4128                 base = gen_rtx_fmt_e (CONST, address_mode,
4129                                       gen_rtx_fmt_ee
4130                                         (PLUS, address_mode, base,
4131                                          gen_int_mode (off, address_mode)));
4132             }
4133           else if (off_p)
4134             base = gen_int_mode (off, address_mode);
4135           else
4136             base = NULL_RTX;
4137
4138           if (base)
4139             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4140
4141           start_sequence ();
4142           /* To avoid splitting addressing modes, pretend that no cse will
4143              follow.  */
4144           old_cse_not_expected = cse_not_expected;
4145           cse_not_expected = true;
4146           addr = memory_address_addr_space (mem_mode, addr, as);
4147           cse_not_expected = old_cse_not_expected;
4148           seq = get_insns ();
4149           end_sequence ();
4150
4151           acost = seq_cost (seq, speed);
4152           acost += address_cost (addr, mem_mode, as, speed);
4153
4154           if (!acost)
4155             acost = 1;
4156           data->costs[sym_p][var_p][off_p][rat_p] = acost;
4157         }
4158
4159       /* On some targets, it is quite expensive to load symbol to a register,
4160          which makes addresses that contain symbols look much more expensive.
4161          However, the symbol will have to be loaded in any case before the
4162          loop (and quite likely we have it in register already), so it does not
4163          make much sense to penalize them too heavily.  So make some final
4164          tweaks for the SYMBOL_PRESENT modes:
4165
4166          If VAR_PRESENT is false, and the mode obtained by changing symbol to
4167          var is cheaper, use this mode with small penalty.
4168          If VAR_PRESENT is true, try whether the mode with
4169          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4170          if this is the case, use it.  */
4171       add_c = add_cost (speed, address_mode);
4172       for (i = 0; i < 8; i++)
4173         {
4174           var_p = i & 1;
4175           off_p = (i >> 1) & 1;
4176           rat_p = (i >> 2) & 1;
4177
4178           acost = data->costs[0][1][off_p][rat_p] + 1;
4179           if (var_p)
4180             acost += add_c;
4181
4182           if (acost < data->costs[1][var_p][off_p][rat_p])
4183             data->costs[1][var_p][off_p][rat_p] = acost;
4184         }
4185
4186       if (dump_file && (dump_flags & TDF_DETAILS))
4187         {
4188           fprintf (dump_file, "<Address Costs>:\n");
4189
4190           for (i = 0; i < 16; i++)
4191             {
4192               sym_p = i & 1;
4193               var_p = (i >> 1) & 1;
4194               off_p = (i >> 2) & 1;
4195               rat_p = (i >> 3) & 1;
4196
4197               fprintf (dump_file, "  ");
4198               if (sym_p)
4199                 fprintf (dump_file, "sym + ");
4200               if (var_p)
4201                 fprintf (dump_file, "var + ");
4202               if (off_p)
4203                 fprintf (dump_file, "cst + ");
4204               if (rat_p)
4205                 fprintf (dump_file, "rat * ");
4206
4207               acost = data->costs[sym_p][var_p][off_p][rat_p];
4208               fprintf (dump_file, "index costs %d\n", acost);
4209             }
4210           if (has_predec[mem_mode] || has_postdec[mem_mode]
4211               || has_preinc[mem_mode] || has_postinc[mem_mode])
4212             fprintf (dump_file, "  May include autoinc/dec\n");
4213           fprintf (dump_file, "\n");
4214         }
4215
4216       address_cost_data_list[data_index] = data;
4217     }
4218
4219   bits = GET_MODE_BITSIZE (address_mode);
4220   mask = ~(HOST_WIDE_INT_M1U << (bits - 1) << 1);
4221   offset &= mask;
4222   if ((offset >> (bits - 1) & 1))
4223     offset |= ~mask;
4224   s_offset = offset;
4225
4226   autoinc = false;
4227   autoinc_type = AINC_NONE;
4228   msize = GET_MODE_SIZE (mem_mode);
4229   autoinc_offset = offset;
4230   if (stmt_after_inc)
4231     autoinc_offset += ratio * cstep;
4232   if (symbol_present || var_present || ratio != 1)
4233     autoinc = false;
4234   else
4235     {
4236       if (has_postinc[mem_mode] && autoinc_offset == 0
4237           && msize == cstep)
4238         autoinc_type = AINC_POST_INC;
4239       else if (has_postdec[mem_mode] && autoinc_offset == 0
4240                && msize == -cstep)
4241         autoinc_type = AINC_POST_DEC;
4242       else if (has_preinc[mem_mode] && autoinc_offset == msize
4243                && msize == cstep)
4244         autoinc_type = AINC_PRE_INC;
4245       else if (has_predec[mem_mode] && autoinc_offset == -msize
4246                && msize == -cstep)
4247         autoinc_type = AINC_PRE_DEC;
4248
4249       if (autoinc_type != AINC_NONE)
4250         autoinc = true;
4251     }
4252
4253   cost = 0;
4254   offset_p = (s_offset != 0
4255               && data->min_offset <= s_offset
4256               && s_offset <= data->max_offset);
4257   ratio_p = (ratio != 1
4258              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4259
4260   if (ratio != 1 && !ratio_p)
4261     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4262
4263   if (s_offset && !offset_p && !symbol_present)
4264     cost += add_cost (speed, address_mode);
4265
4266   if (may_autoinc)
4267     *may_autoinc = autoinc;
4268   if (autoinc)
4269     acost = data->ainc_costs[autoinc_type];
4270   else
4271     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4272   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4273   return comp_cost (cost + acost, complexity);
4274 }
4275
4276  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4277     EXPR operand holding the shift.  COST0 and COST1 are the costs for
4278     calculating the operands of EXPR.  Returns true if successful, and returns
4279     the cost in COST.  */
4280
4281 static bool
4282 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4283                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4284 {
4285   comp_cost res;
4286   tree op1 = TREE_OPERAND (expr, 1);
4287   tree cst = TREE_OPERAND (mult, 1);
4288   tree multop = TREE_OPERAND (mult, 0);
4289   int m = exact_log2 (int_cst_value (cst));
4290   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4291   int as_cost, sa_cost;
4292   bool mult_in_op1;
4293
4294   if (!(m >= 0 && m < maxm))
4295     return false;
4296
4297   STRIP_NOPS (op1);
4298   mult_in_op1 = operand_equal_p (op1, mult, 0);
4299
4300   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4301
4302   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4303      use that in preference to a shift insn followed by an add insn.  */
4304   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4305              ? shiftadd_cost (speed, mode, m)
4306              : (mult_in_op1
4307                 ? shiftsub1_cost (speed, mode, m)
4308                 : shiftsub0_cost (speed, mode, m)));
4309
4310   res = comp_cost (MIN (as_cost, sa_cost), 0);
4311   res += (mult_in_op1 ? cost0 : cost1);
4312
4313   STRIP_NOPS (multop);
4314   if (!is_gimple_val (multop))
4315     res += force_expr_to_var_cost (multop, speed);
4316
4317   *cost = res;
4318   return true;
4319 }
4320
4321 /* Estimates cost of forcing expression EXPR into a variable.  */
4322
4323 static comp_cost
4324 force_expr_to_var_cost (tree expr, bool speed)
4325 {
4326   static bool costs_initialized = false;
4327   static unsigned integer_cost [2];
4328   static unsigned symbol_cost [2];
4329   static unsigned address_cost [2];
4330   tree op0, op1;
4331   comp_cost cost0, cost1, cost;
4332   machine_mode mode;
4333
4334   if (!costs_initialized)
4335     {
4336       tree type = build_pointer_type (integer_type_node);
4337       tree var, addr;
4338       rtx x;
4339       int i;
4340
4341       var = create_tmp_var_raw (integer_type_node, "test_var");
4342       TREE_STATIC (var) = 1;
4343       x = produce_memory_decl_rtl (var, NULL);
4344       SET_DECL_RTL (var, x);
4345
4346       addr = build1 (ADDR_EXPR, type, var);
4347
4348
4349       for (i = 0; i < 2; i++)
4350         {
4351           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4352                                                              2000), i);
4353
4354           symbol_cost[i] = computation_cost (addr, i) + 1;
4355
4356           address_cost[i]
4357             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4358           if (dump_file && (dump_flags & TDF_DETAILS))
4359             {
4360               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4361               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4362               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4363               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4364               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4365               fprintf (dump_file, "\n");
4366             }
4367         }
4368
4369       costs_initialized = true;
4370     }
4371
4372   STRIP_NOPS (expr);
4373
4374   if (SSA_VAR_P (expr))
4375     return no_cost;
4376
4377   if (is_gimple_min_invariant (expr))
4378     {
4379       if (TREE_CODE (expr) == INTEGER_CST)
4380         return comp_cost (integer_cost [speed], 0);
4381
4382       if (TREE_CODE (expr) == ADDR_EXPR)
4383         {
4384           tree obj = TREE_OPERAND (expr, 0);
4385
4386           if (VAR_P (obj)
4387               || TREE_CODE (obj) == PARM_DECL
4388               || TREE_CODE (obj) == RESULT_DECL)
4389             return comp_cost (symbol_cost [speed], 0);
4390         }
4391
4392       return comp_cost (address_cost [speed], 0);
4393     }
4394
4395   switch (TREE_CODE (expr))
4396     {
4397     case POINTER_PLUS_EXPR:
4398     case PLUS_EXPR:
4399     case MINUS_EXPR:
4400     case MULT_EXPR:
4401       op0 = TREE_OPERAND (expr, 0);
4402       op1 = TREE_OPERAND (expr, 1);
4403       STRIP_NOPS (op0);
4404       STRIP_NOPS (op1);
4405       break;
4406
4407     CASE_CONVERT:
4408     case NEGATE_EXPR:
4409       op0 = TREE_OPERAND (expr, 0);
4410       STRIP_NOPS (op0);
4411       op1 = NULL_TREE;
4412       break;
4413
4414     default:
4415       /* Just an arbitrary value, FIXME.  */
4416       return comp_cost (target_spill_cost[speed], 0);
4417     }
4418
4419   if (op0 == NULL_TREE
4420       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4421     cost0 = no_cost;
4422   else
4423     cost0 = force_expr_to_var_cost (op0, speed);
4424
4425   if (op1 == NULL_TREE
4426       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4427     cost1 = no_cost;
4428   else
4429     cost1 = force_expr_to_var_cost (op1, speed);
4430
4431   mode = TYPE_MODE (TREE_TYPE (expr));
4432   switch (TREE_CODE (expr))
4433     {
4434     case POINTER_PLUS_EXPR:
4435     case PLUS_EXPR:
4436     case MINUS_EXPR:
4437     case NEGATE_EXPR:
4438       cost = comp_cost (add_cost (speed, mode), 0);
4439       if (TREE_CODE (expr) != NEGATE_EXPR)
4440         {
4441           tree mult = NULL_TREE;
4442           comp_cost sa_cost;
4443           if (TREE_CODE (op1) == MULT_EXPR)
4444             mult = op1;
4445           else if (TREE_CODE (op0) == MULT_EXPR)
4446             mult = op0;
4447
4448           if (mult != NULL_TREE
4449               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4450               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4451                                     speed, &sa_cost))
4452             return sa_cost;
4453         }
4454       break;
4455
4456     CASE_CONVERT:
4457       {
4458         tree inner_mode, outer_mode;
4459         outer_mode = TREE_TYPE (expr);
4460         inner_mode = TREE_TYPE (op0);
4461         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4462                                        TYPE_MODE (inner_mode), speed), 0);
4463       }
4464       break;
4465
4466     case MULT_EXPR:
4467       if (cst_and_fits_in_hwi (op0))
4468         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4469                                              mode, speed), 0);
4470       else if (cst_and_fits_in_hwi (op1))
4471         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4472                                              mode, speed), 0);
4473       else
4474         return comp_cost (target_spill_cost [speed], 0);
4475       break;
4476
4477     default:
4478       gcc_unreachable ();
4479     }
4480
4481   cost += cost0;
4482   cost += cost1;
4483
4484   /* Bound the cost by target_spill_cost.  The parts of complicated
4485      computations often are either loop invariant or at least can
4486      be shared between several iv uses, so letting this grow without
4487      limits would not give reasonable results.  */
4488   if (cost.cost > (int) target_spill_cost [speed])
4489     cost.cost = target_spill_cost [speed];
4490
4491   return cost;
4492 }
4493
4494 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4495    invariants the computation depends on.  */
4496
4497 static comp_cost
4498 force_var_cost (struct ivopts_data *data,
4499                 tree expr, bitmap *depends_on)
4500 {
4501   if (depends_on)
4502     {
4503       fd_ivopts_data = data;
4504       walk_tree (&expr, find_depends, depends_on, NULL);
4505     }
4506
4507   return force_expr_to_var_cost (expr, data->speed);
4508 }
4509
4510 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4511    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4512    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4513    invariants the computation depends on.  */
4514
4515 static comp_cost
4516 split_address_cost (struct ivopts_data *data,
4517                     tree addr, bool *symbol_present, bool *var_present,
4518                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4519 {
4520   tree core;
4521   HOST_WIDE_INT bitsize;
4522   HOST_WIDE_INT bitpos;
4523   tree toffset;
4524   machine_mode mode;
4525   int unsignedp, reversep, volatilep;
4526
4527   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4528                               &unsignedp, &reversep, &volatilep);
4529
4530   if (toffset != 0
4531       || bitpos % BITS_PER_UNIT != 0
4532       || reversep
4533       || !VAR_P (core))
4534     {
4535       *symbol_present = false;
4536       *var_present = true;
4537       fd_ivopts_data = data;
4538       if (depends_on)
4539         walk_tree (&addr, find_depends, depends_on, NULL);
4540
4541       return comp_cost (target_spill_cost[data->speed], 0);
4542     }
4543
4544   *offset += bitpos / BITS_PER_UNIT;
4545   if (TREE_STATIC (core)
4546       || DECL_EXTERNAL (core))
4547     {
4548       *symbol_present = true;
4549       *var_present = false;
4550       return no_cost;
4551     }
4552
4553   *symbol_present = false;
4554   *var_present = true;
4555   return no_cost;
4556 }
4557
4558 /* Estimates cost of expressing difference of addresses E1 - E2 as
4559    var + symbol + offset.  The value of offset is added to OFFSET,
4560    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4561    part is missing.  DEPENDS_ON is a set of the invariants the computation
4562    depends on.  */
4563
4564 static comp_cost
4565 ptr_difference_cost (struct ivopts_data *data,
4566                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4567                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4568 {
4569   HOST_WIDE_INT diff = 0;
4570   aff_tree aff_e1, aff_e2;
4571   tree type;
4572
4573   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4574
4575   if (ptr_difference_const (e1, e2, &diff))
4576     {
4577       *offset += diff;
4578       *symbol_present = false;
4579       *var_present = false;
4580       return no_cost;
4581     }
4582
4583   if (integer_zerop (e2))
4584     return split_address_cost (data, TREE_OPERAND (e1, 0),
4585                                symbol_present, var_present, offset, depends_on);
4586
4587   *symbol_present = false;
4588   *var_present = true;
4589
4590   type = signed_type_for (TREE_TYPE (e1));
4591   tree_to_aff_combination (e1, type, &aff_e1);
4592   tree_to_aff_combination (e2, type, &aff_e2);
4593   aff_combination_scale (&aff_e2, -1);
4594   aff_combination_add (&aff_e1, &aff_e2);
4595
4596   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4597 }
4598
4599 /* Estimates cost of expressing difference E1 - E2 as
4600    var + symbol + offset.  The value of offset is added to OFFSET,
4601    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4602    part is missing.  DEPENDS_ON is a set of the invariants the computation
4603    depends on.  */
4604
4605 static comp_cost
4606 difference_cost (struct ivopts_data *data,
4607                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4608                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4609 {
4610   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4611   unsigned HOST_WIDE_INT off1, off2;
4612   aff_tree aff_e1, aff_e2;
4613   tree type;
4614
4615   e1 = strip_offset (e1, &off1);
4616   e2 = strip_offset (e2, &off2);
4617   *offset += off1 - off2;
4618
4619   STRIP_NOPS (e1);
4620   STRIP_NOPS (e2);
4621
4622   if (TREE_CODE (e1) == ADDR_EXPR)
4623     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4624                                 offset, depends_on);
4625   *symbol_present = false;
4626
4627   if (operand_equal_p (e1, e2, 0))
4628     {
4629       *var_present = false;
4630       return no_cost;
4631     }
4632
4633   *var_present = true;
4634
4635   if (integer_zerop (e2))
4636     return force_var_cost (data, e1, depends_on);
4637
4638   if (integer_zerop (e1))
4639     {
4640       comp_cost cost = force_var_cost (data, e2, depends_on);
4641       cost += mult_by_coeff_cost (-1, mode, data->speed);
4642       return cost;
4643     }
4644
4645   type = signed_type_for (TREE_TYPE (e1));
4646   tree_to_aff_combination (e1, type, &aff_e1);
4647   tree_to_aff_combination (e2, type, &aff_e2);
4648   aff_combination_scale (&aff_e2, -1);
4649   aff_combination_add (&aff_e1, &aff_e2);
4650
4651   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4652 }
4653
4654 /* Returns true if AFF1 and AFF2 are identical.  */
4655
4656 static bool
4657 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4658 {
4659   unsigned i;
4660
4661   if (aff1->n != aff2->n)
4662     return false;
4663
4664   for (i = 0; i < aff1->n; i++)
4665     {
4666       if (aff1->elts[i].coef != aff2->elts[i].coef)
4667         return false;
4668
4669       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4670         return false;
4671     }
4672   return true;
4673 }
4674
4675 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
4676
4677 static iv_inv_expr_ent *
4678 record_inv_expr (struct ivopts_data *data, tree expr)
4679 {
4680   struct iv_inv_expr_ent ent;
4681   struct iv_inv_expr_ent **slot;
4682
4683   ent.expr = expr;
4684   ent.hash = iterative_hash_expr (expr, 0);
4685   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4686
4687   if (!*slot)
4688     {
4689       *slot = XNEW (struct iv_inv_expr_ent);
4690       (*slot)->expr = expr;
4691       (*slot)->hash = ent.hash;
4692       (*slot)->id = data->max_inv_expr_id++;
4693     }
4694
4695   return *slot;
4696 }
4697
4698 /* Returns the invariant expression if expression UBASE - RATIO * CBASE
4699    requires a new compiler generated temporary.  Returns -1 otherwise.
4700    ADDRESS_P is a flag indicating if the expression is for address
4701    computation.  */
4702
4703 static iv_inv_expr_ent *
4704 get_loop_invariant_expr (struct ivopts_data *data, tree ubase,
4705                          tree cbase, HOST_WIDE_INT ratio,
4706                          bool address_p)
4707 {
4708   aff_tree ubase_aff, cbase_aff;
4709   tree expr, ub, cb;
4710
4711   STRIP_NOPS (ubase);
4712   STRIP_NOPS (cbase);
4713   ub = ubase;
4714   cb = cbase;
4715
4716   if ((TREE_CODE (ubase) == INTEGER_CST)
4717       && (TREE_CODE (cbase) == INTEGER_CST))
4718     return NULL;
4719
4720   /* Strips the constant part. */
4721   if (TREE_CODE (ubase) == PLUS_EXPR
4722       || TREE_CODE (ubase) == MINUS_EXPR
4723       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4724     {
4725       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4726         ubase = TREE_OPERAND (ubase, 0);
4727     }
4728
4729   /* Strips the constant part. */
4730   if (TREE_CODE (cbase) == PLUS_EXPR
4731       || TREE_CODE (cbase) == MINUS_EXPR
4732       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4733     {
4734       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4735         cbase = TREE_OPERAND (cbase, 0);
4736     }
4737
4738   if (address_p)
4739     {
4740       if (((TREE_CODE (ubase) == SSA_NAME)
4741            || (TREE_CODE (ubase) == ADDR_EXPR
4742                && is_gimple_min_invariant (ubase)))
4743           && (TREE_CODE (cbase) == INTEGER_CST))
4744         return NULL;
4745
4746       if (((TREE_CODE (cbase) == SSA_NAME)
4747            || (TREE_CODE (cbase) == ADDR_EXPR
4748                && is_gimple_min_invariant (cbase)))
4749           && (TREE_CODE (ubase) == INTEGER_CST))
4750         return NULL;
4751     }
4752
4753   if (ratio == 1)
4754     {
4755       if (operand_equal_p (ubase, cbase, 0))
4756         return NULL;
4757
4758       if (TREE_CODE (ubase) == ADDR_EXPR
4759           && TREE_CODE (cbase) == ADDR_EXPR)
4760         {
4761           tree usym, csym;
4762
4763           usym = TREE_OPERAND (ubase, 0);
4764           csym = TREE_OPERAND (cbase, 0);
4765           if (TREE_CODE (usym) == ARRAY_REF)
4766             {
4767               tree ind = TREE_OPERAND (usym, 1);
4768               if (TREE_CODE (ind) == INTEGER_CST
4769                   && tree_fits_shwi_p (ind)
4770                   && tree_to_shwi (ind) == 0)
4771                 usym = TREE_OPERAND (usym, 0);
4772             }
4773           if (TREE_CODE (csym) == ARRAY_REF)
4774             {
4775               tree ind = TREE_OPERAND (csym, 1);
4776               if (TREE_CODE (ind) == INTEGER_CST
4777                   && tree_fits_shwi_p (ind)
4778                   && tree_to_shwi (ind) == 0)
4779                 csym = TREE_OPERAND (csym, 0);
4780             }
4781           if (operand_equal_p (usym, csym, 0))
4782             return NULL;
4783         }
4784       /* Now do more complex comparison  */
4785       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4786       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4787       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4788         return NULL;
4789     }
4790
4791   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4792   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4793
4794   aff_combination_scale (&cbase_aff, -1 * ratio);
4795   aff_combination_add (&ubase_aff, &cbase_aff);
4796   expr = aff_combination_to_tree (&ubase_aff);
4797   return record_inv_expr (data, expr);
4798 }
4799
4800 /* Scale (multiply) the computed COST (except scratch part that should be
4801    hoisted out a loop) by header->frequency / AT->frequency,
4802    which makes expected cost more accurate.  */
4803
4804 static comp_cost
4805 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, iv_cand *cand,
4806                                 comp_cost cost)
4807 {
4808    int loop_freq = data->current_loop->header->frequency;
4809    int bb_freq = at->bb->frequency;
4810    if (loop_freq != 0)
4811      {
4812        gcc_assert (cost.scratch <= cost.cost);
4813        int scaled_cost
4814          = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4815
4816        if (dump_file && (dump_flags & TDF_DETAILS))
4817          fprintf (dump_file, "Scaling iv_use based on cand %d "
4818                   "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4819                   cand->id, 1.0f * bb_freq / loop_freq, cost.cost,
4820                   cost.scratch, scaled_cost, bb_freq, loop_freq);
4821
4822        cost.cost = scaled_cost;
4823      }
4824
4825   return cost;
4826 }
4827
4828 /* Determines the cost of the computation by that USE is expressed
4829    from induction variable CAND.  If ADDRESS_P is true, we just need
4830    to create an address from it, otherwise we want to get it into
4831    register.  A set of invariants we depend on is stored in
4832    DEPENDS_ON.  AT is the statement at that the value is computed.
4833    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4834    addressing is likely.  */
4835
4836 static comp_cost
4837 get_computation_cost_at (struct ivopts_data *data,
4838                          struct iv_use *use, struct iv_cand *cand,
4839                          bool address_p, bitmap *depends_on, gimple *at,
4840                          bool *can_autoinc,
4841                          iv_inv_expr_ent **inv_expr)
4842 {
4843   tree ubase = use->iv->base, ustep = use->iv->step;
4844   tree cbase, cstep;
4845   tree utype = TREE_TYPE (ubase), ctype;
4846   unsigned HOST_WIDE_INT cstepi, offset = 0;
4847   HOST_WIDE_INT ratio, aratio;
4848   bool var_present, symbol_present, stmt_is_after_inc;
4849   comp_cost cost;
4850   widest_int rat;
4851   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4852   machine_mode mem_mode = (address_p
4853                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4854                                 : VOIDmode);
4855
4856   if (depends_on)
4857     *depends_on = NULL;
4858
4859   /* Only consider real candidates.  */
4860   if (!cand->iv)
4861     return infinite_cost;
4862
4863   cbase = cand->iv->base;
4864   cstep = cand->iv->step;
4865   ctype = TREE_TYPE (cbase);
4866
4867   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4868     {
4869       /* We do not have a precision to express the values of use.  */
4870       return infinite_cost;
4871     }
4872
4873   if (address_p
4874       || (use->iv->base_object
4875           && cand->iv->base_object
4876           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4877           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4878     {
4879       /* Do not try to express address of an object with computation based
4880          on address of a different object.  This may cause problems in rtl
4881          level alias analysis (that does not expect this to be happening,
4882          as this is illegal in C), and would be unlikely to be useful
4883          anyway.  */
4884       if (use->iv->base_object
4885           && cand->iv->base_object
4886           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4887         return infinite_cost;
4888     }
4889
4890   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4891     {
4892       /* TODO -- add direct handling of this case.  */
4893       goto fallback;
4894     }
4895
4896   /* CSTEPI is removed from the offset in case statement is after the
4897      increment.  If the step is not constant, we use zero instead.
4898      This is a bit imprecise (there is the extra addition), but
4899      redundancy elimination is likely to transform the code so that
4900      it uses value of the variable before increment anyway,
4901      so it is not that much unrealistic.  */
4902   if (cst_and_fits_in_hwi (cstep))
4903     cstepi = int_cst_value (cstep);
4904   else
4905     cstepi = 0;
4906
4907   if (!constant_multiple_of (ustep, cstep, &rat))
4908     return infinite_cost;
4909
4910   if (wi::fits_shwi_p (rat))
4911     ratio = rat.to_shwi ();
4912   else
4913     return infinite_cost;
4914
4915   STRIP_NOPS (cbase);
4916   ctype = TREE_TYPE (cbase);
4917
4918   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4919
4920   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4921      or ratio == 1, it is better to handle this like
4922
4923      ubase - ratio * cbase + ratio * var
4924
4925      (also holds in the case ratio == -1, TODO.  */
4926
4927   if (cst_and_fits_in_hwi (cbase))
4928     {
4929       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4930       cost = difference_cost (data,
4931                               ubase, build_int_cst (utype, 0),
4932                               &symbol_present, &var_present, &offset,
4933                               depends_on);
4934       cost /= avg_loop_niter (data->current_loop);
4935     }
4936   else if (ratio == 1)
4937     {
4938       tree real_cbase = cbase;
4939
4940       /* Check to see if any adjustment is needed.  */
4941       if (cstepi == 0 && stmt_is_after_inc)
4942         {
4943           aff_tree real_cbase_aff;
4944           aff_tree cstep_aff;
4945
4946           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4947                                    &real_cbase_aff);
4948           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4949
4950           aff_combination_add (&real_cbase_aff, &cstep_aff);
4951           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4952         }
4953
4954       cost = difference_cost (data,
4955                               ubase, real_cbase,
4956                               &symbol_present, &var_present, &offset,
4957                               depends_on);
4958       cost /= avg_loop_niter (data->current_loop);
4959     }
4960   else if (address_p
4961            && !POINTER_TYPE_P (ctype)
4962            && multiplier_allowed_in_address_p
4963                 (ratio, mem_mode,
4964                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4965     {
4966       tree real_cbase = cbase;
4967
4968       if (cstepi == 0 && stmt_is_after_inc)
4969         {
4970           if (POINTER_TYPE_P (ctype))
4971             real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4972           else
4973             real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4974         }
4975       real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
4976                                 build_int_cst (ctype, ratio));
4977       cost = difference_cost (data,
4978                               ubase, real_cbase,
4979                               &symbol_present, &var_present, &offset,
4980                               depends_on);
4981       cost /= avg_loop_niter (data->current_loop);
4982     }
4983   else
4984     {
4985       cost = force_var_cost (data, cbase, depends_on);
4986       cost += difference_cost (data, ubase, build_int_cst (utype, 0),
4987                                &symbol_present, &var_present, &offset,
4988                                depends_on);
4989       cost /= avg_loop_niter (data->current_loop);
4990       cost += add_cost (data->speed, TYPE_MODE (ctype));
4991     }
4992
4993   /* Record setup cost in scratch field.  */
4994   cost.scratch = cost.cost;
4995
4996   if (inv_expr && depends_on && *depends_on)
4997     {
4998       *inv_expr = get_loop_invariant_expr (data, ubase, cbase, ratio,
4999                                            address_p);
5000       /* Clear depends on.  */
5001       if (*inv_expr != NULL)
5002         bitmap_clear (*depends_on);
5003     }
5004
5005   /* If we are after the increment, the value of the candidate is higher by
5006      one iteration.  */
5007   if (stmt_is_after_inc)
5008     offset -= ratio * cstepi;
5009
5010   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
5011      (symbol/var1/const parts may be omitted).  If we are looking for an
5012      address, find the cost of addressing this.  */
5013   if (address_p)
5014     {
5015       cost += get_address_cost (symbol_present, var_present,
5016                                 offset, ratio, cstepi,
5017                                 mem_mode,
5018                                 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
5019                                 speed, stmt_is_after_inc, can_autoinc);
5020       return get_scaled_computation_cost_at (data, at, cand, cost);
5021     }
5022
5023   /* Otherwise estimate the costs for computing the expression.  */
5024   if (!symbol_present && !var_present && !offset)
5025     {
5026       if (ratio != 1)
5027         cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
5028       return get_scaled_computation_cost_at (data, at, cand, cost);
5029     }
5030
5031   /* Symbol + offset should be compile-time computable so consider that they
5032       are added once to the variable, if present.  */
5033   if (var_present && (symbol_present || offset))
5034     cost += adjust_setup_cost (data,
5035                                     add_cost (speed, TYPE_MODE (ctype)));
5036
5037   /* Having offset does not affect runtime cost in case it is added to
5038      symbol, but it increases complexity.  */
5039   if (offset)
5040     cost.complexity++;
5041
5042   cost += add_cost (speed, TYPE_MODE (ctype));
5043
5044   aratio = ratio > 0 ? ratio : -ratio;
5045   if (aratio != 1)
5046     cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5047
5048   return get_scaled_computation_cost_at (data, at, cand, cost);
5049
5050 fallback:
5051   if (can_autoinc)
5052     *can_autoinc = false;
5053
5054   /* Just get the expression, expand it and measure the cost.  */
5055   tree comp = get_computation_at (data->current_loop, use, cand, at);
5056
5057   if (!comp)
5058     return infinite_cost;
5059
5060   if (address_p)
5061     comp = build_simple_mem_ref (comp);
5062
5063   cost = comp_cost (computation_cost (comp, speed), 0);
5064
5065   return get_scaled_computation_cost_at (data, at, cand, cost);
5066 }
5067
5068 /* Determines the cost of the computation by that USE is expressed
5069    from induction variable CAND.  If ADDRESS_P is true, we just need
5070    to create an address from it, otherwise we want to get it into
5071    register.  A set of invariants we depend on is stored in
5072    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
5073    autoinc addressing is likely.  */
5074
5075 static comp_cost
5076 get_computation_cost (struct ivopts_data *data,
5077                       struct iv_use *use, struct iv_cand *cand,
5078                       bool address_p, bitmap *depends_on,
5079                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
5080 {
5081   return get_computation_cost_at (data,
5082                                   use, cand, address_p, depends_on, use->stmt,
5083                                   can_autoinc, inv_expr);
5084 }
5085
5086 /* Determines cost of computing the use in GROUP with CAND in a generic
5087    expression.  */
5088
5089 static bool
5090 determine_group_iv_cost_generic (struct ivopts_data *data,
5091                                  struct iv_group *group, struct iv_cand *cand)
5092 {
5093   comp_cost cost;
5094   iv_inv_expr_ent *inv_expr = NULL;
5095   bitmap depends_on = NULL;
5096   struct iv_use *use = group->vuses[0];
5097
5098   /* The simple case first -- if we need to express value of the preserved
5099      original biv, the cost is 0.  This also prevents us from counting the
5100      cost of increment twice -- once at this use and once in the cost of
5101      the candidate.  */
5102   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5103     cost = no_cost;
5104   else
5105     cost = get_computation_cost (data, use, cand, false,
5106                                  &depends_on, NULL, &inv_expr);
5107
5108   set_group_iv_cost (data, group, cand, cost, depends_on,
5109                      NULL_TREE, ERROR_MARK, inv_expr);
5110   return !cost.infinite_cost_p ();
5111 }
5112
5113 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
5114
5115 static bool
5116 determine_group_iv_cost_address (struct ivopts_data *data,
5117                                  struct iv_group *group, struct iv_cand *cand)
5118 {
5119   unsigned i;
5120   bitmap depends_on;
5121   bool can_autoinc;
5122   iv_inv_expr_ent *inv_expr = NULL;
5123   struct iv_use *use = group->vuses[0];
5124   comp_cost sum_cost = no_cost, cost;
5125
5126   cost = get_computation_cost (data, use, cand, true,
5127                                &depends_on, &can_autoinc, &inv_expr);
5128
5129   sum_cost = cost;
5130   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5131     {
5132       if (can_autoinc)
5133         sum_cost -= cand->cost_step;
5134       /* If we generated the candidate solely for exploiting autoincrement
5135          opportunities, and it turns out it can't be used, set the cost to
5136          infinity to make sure we ignore it.  */
5137       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5138         sum_cost = infinite_cost;
5139     }
5140
5141   /* Uses in a group can share setup code, so only add setup cost once.  */
5142   cost -= cost.scratch;
5143   /* Compute and add costs for rest uses of this group.  */
5144   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5145     {
5146       struct iv_use *next = group->vuses[i];
5147
5148       /* TODO: We could skip computing cost for sub iv_use when it has the
5149          same cost as the first iv_use, but the cost really depends on the
5150          offset and where the iv_use is.  */
5151         cost = get_computation_cost (data, next, cand, true,
5152                                      NULL, &can_autoinc, NULL);
5153       sum_cost += cost;
5154     }
5155   set_group_iv_cost (data, group, cand, sum_cost, depends_on,
5156                      NULL_TREE, ERROR_MARK, inv_expr);
5157
5158   return !sum_cost.infinite_cost_p ();
5159 }
5160
5161 /* Computes value of candidate CAND at position AT in iteration NITER, and
5162    stores it to VAL.  */
5163
5164 static void
5165 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5166                aff_tree *val)
5167 {
5168   aff_tree step, delta, nit;
5169   struct iv *iv = cand->iv;
5170   tree type = TREE_TYPE (iv->base);
5171   tree steptype;
5172   if (POINTER_TYPE_P (type))
5173     steptype = sizetype;
5174   else
5175     steptype = unsigned_type_for (type);
5176
5177   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5178   aff_combination_convert (&step, steptype);
5179   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5180   aff_combination_convert (&nit, steptype);
5181   aff_combination_mult (&nit, &step, &delta);
5182   if (stmt_after_increment (loop, cand, at))
5183     aff_combination_add (&delta, &step);
5184
5185   tree_to_aff_combination (iv->base, type, val);
5186   if (!POINTER_TYPE_P (type))
5187     aff_combination_convert (val, steptype);
5188   aff_combination_add (val, &delta);
5189 }
5190
5191 /* Returns period of induction variable iv.  */
5192
5193 static tree
5194 iv_period (struct iv *iv)
5195 {
5196   tree step = iv->step, period, type;
5197   tree pow2div;
5198
5199   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5200
5201   type = unsigned_type_for (TREE_TYPE (step));
5202   /* Period of the iv is lcm (step, type_range)/step -1,
5203      i.e., N*type_range/step - 1. Since type range is power
5204      of two, N == (step >> num_of_ending_zeros_binary (step),
5205      so the final result is
5206
5207        (type_range >> num_of_ending_zeros_binary (step)) - 1
5208
5209   */
5210   pow2div = num_ending_zeros (step);
5211
5212   period = build_low_bits_mask (type,
5213                                 (TYPE_PRECISION (type)
5214                                  - tree_to_uhwi (pow2div)));
5215
5216   return period;
5217 }
5218
5219 /* Returns the comparison operator used when eliminating the iv USE.  */
5220
5221 static enum tree_code
5222 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5223 {
5224   struct loop *loop = data->current_loop;
5225   basic_block ex_bb;
5226   edge exit;
5227
5228   ex_bb = gimple_bb (use->stmt);
5229   exit = EDGE_SUCC (ex_bb, 0);
5230   if (flow_bb_inside_loop_p (loop, exit->dest))
5231     exit = EDGE_SUCC (ex_bb, 1);
5232
5233   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5234 }
5235
5236 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5237    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5238    calculation is performed in non-wrapping type.
5239
5240    TODO: More generally, we could test for the situation that
5241          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5242          This would require knowing the sign of OFFSET.  */
5243
5244 static bool
5245 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5246 {
5247   enum tree_code code;
5248   tree e1, e2;
5249   aff_tree aff_e1, aff_e2, aff_offset;
5250
5251   if (!nowrap_type_p (TREE_TYPE (base)))
5252     return false;
5253
5254   base = expand_simple_operations (base);
5255
5256   if (TREE_CODE (base) == SSA_NAME)
5257     {
5258       gimple *stmt = SSA_NAME_DEF_STMT (base);
5259
5260       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5261         return false;
5262
5263       code = gimple_assign_rhs_code (stmt);
5264       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5265         return false;
5266
5267       e1 = gimple_assign_rhs1 (stmt);
5268       e2 = gimple_assign_rhs2 (stmt);
5269     }
5270   else
5271     {
5272       code = TREE_CODE (base);
5273       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5274         return false;
5275       e1 = TREE_OPERAND (base, 0);
5276       e2 = TREE_OPERAND (base, 1);
5277     }
5278
5279   /* Use affine expansion as deeper inspection to prove the equality.  */
5280   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5281                                   &aff_e2, &data->name_expansion_cache);
5282   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5283                                   &aff_offset, &data->name_expansion_cache);
5284   aff_combination_scale (&aff_offset, -1);
5285   switch (code)
5286     {
5287     case PLUS_EXPR:
5288       aff_combination_add (&aff_e2, &aff_offset);
5289       if (aff_combination_zero_p (&aff_e2))
5290         return true;
5291
5292       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5293                                       &aff_e1, &data->name_expansion_cache);
5294       aff_combination_add (&aff_e1, &aff_offset);
5295       return aff_combination_zero_p (&aff_e1);
5296
5297     case POINTER_PLUS_EXPR:
5298       aff_combination_add (&aff_e2, &aff_offset);
5299       return aff_combination_zero_p (&aff_e2);
5300
5301     default:
5302       return false;
5303     }
5304 }
5305
5306 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5307    comparison with CAND.  NITER describes the number of iterations of
5308    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5309
5310    We aim to handle the following situation:
5311
5312    sometype *base, *p;
5313    int a, b, i;
5314
5315    i = a;
5316    p = p_0 = base + a;
5317
5318    do
5319      {
5320        bla (*p);
5321        p++;
5322        i++;
5323      }
5324    while (i < b);
5325
5326    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5327    We aim to optimize this to
5328
5329    p = p_0 = base + a;
5330    do
5331      {
5332        bla (*p);
5333        p++;
5334      }
5335    while (p < p_0 - a + b);
5336
5337    This preserves the correctness, since the pointer arithmetics does not
5338    overflow.  More precisely:
5339
5340    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5341       overflow in computing it or the values of p.
5342    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5343       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5344
5345 static bool
5346 iv_elimination_compare_lt (struct ivopts_data *data,
5347                            struct iv_cand *cand, enum tree_code *comp_p,
5348                            struct tree_niter_desc *niter)
5349 {
5350   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5351   struct aff_tree nit, tmpa, tmpb;
5352   enum tree_code comp;
5353   HOST_WIDE_INT step;
5354
5355   /* We need to know that the candidate induction variable does not overflow.
5356      While more complex analysis may be used to prove this, for now just
5357      check that the variable appears in the original program and that it
5358      is computed in a type that guarantees no overflows.  */
5359   cand_type = TREE_TYPE (cand->iv->base);
5360   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5361     return false;
5362
5363   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5364      the calculation of the BOUND could overflow, making the comparison
5365      invalid.  */
5366   if (!data->loop_single_exit_p)
5367     return false;
5368
5369   /* We need to be able to decide whether candidate is increasing or decreasing
5370      in order to choose the right comparison operator.  */
5371   if (!cst_and_fits_in_hwi (cand->iv->step))
5372     return false;
5373   step = int_cst_value (cand->iv->step);
5374
5375   /* Check that the number of iterations matches the expected pattern:
5376      a + 1 > b ? 0 : b - a - 1.  */
5377   mbz = niter->may_be_zero;
5378   if (TREE_CODE (mbz) == GT_EXPR)
5379     {
5380       /* Handle a + 1 > b.  */
5381       tree op0 = TREE_OPERAND (mbz, 0);
5382       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5383         {
5384           a = TREE_OPERAND (op0, 0);
5385           b = TREE_OPERAND (mbz, 1);
5386         }
5387       else
5388         return false;
5389     }
5390   else if (TREE_CODE (mbz) == LT_EXPR)
5391     {
5392       tree op1 = TREE_OPERAND (mbz, 1);
5393
5394       /* Handle b < a + 1.  */
5395       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5396         {
5397           a = TREE_OPERAND (op1, 0);
5398           b = TREE_OPERAND (mbz, 0);
5399         }
5400       else
5401         return false;
5402     }
5403   else
5404     return false;
5405
5406   /* Expected number of iterations is B - A - 1.  Check that it matches
5407      the actual number, i.e., that B - A - NITER = 1.  */
5408   tree_to_aff_combination (niter->niter, nit_type, &nit);
5409   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5410   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5411   aff_combination_scale (&nit, -1);
5412   aff_combination_scale (&tmpa, -1);
5413   aff_combination_add (&tmpb, &tmpa);
5414   aff_combination_add (&tmpb, &nit);
5415   if (tmpb.n != 0 || tmpb.offset != 1)
5416     return false;
5417
5418   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5419      overflow.  */
5420   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5421                         cand->iv->step,
5422                         fold_convert (TREE_TYPE (cand->iv->step), a));
5423   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5424     return false;
5425
5426   /* Determine the new comparison operator.  */
5427   comp = step < 0 ? GT_EXPR : LT_EXPR;
5428   if (*comp_p == NE_EXPR)
5429     *comp_p = comp;
5430   else if (*comp_p == EQ_EXPR)
5431     *comp_p = invert_tree_comparison (comp, false);
5432   else
5433     gcc_unreachable ();
5434
5435   return true;
5436 }
5437
5438 /* Check whether it is possible to express the condition in USE by comparison
5439    of candidate CAND.  If so, store the value compared with to BOUND, and the
5440    comparison operator to COMP.  */
5441
5442 static bool
5443 may_eliminate_iv (struct ivopts_data *data,
5444                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5445                   enum tree_code *comp)
5446 {
5447   basic_block ex_bb;
5448   edge exit;
5449   tree period;
5450   struct loop *loop = data->current_loop;
5451   aff_tree bnd;
5452   struct tree_niter_desc *desc = NULL;
5453
5454   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5455     return false;
5456
5457   /* For now works only for exits that dominate the loop latch.
5458      TODO: extend to other conditions inside loop body.  */
5459   ex_bb = gimple_bb (use->stmt);
5460   if (use->stmt != last_stmt (ex_bb)
5461       || gimple_code (use->stmt) != GIMPLE_COND
5462       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5463     return false;
5464
5465   exit = EDGE_SUCC (ex_bb, 0);
5466   if (flow_bb_inside_loop_p (loop, exit->dest))
5467     exit = EDGE_SUCC (ex_bb, 1);
5468   if (flow_bb_inside_loop_p (loop, exit->dest))
5469     return false;
5470
5471   desc = niter_for_exit (data, exit);
5472   if (!desc)
5473     return false;
5474
5475   /* Determine whether we can use the variable to test the exit condition.
5476      This is the case iff the period of the induction variable is greater
5477      than the number of iterations for which the exit condition is true.  */
5478   period = iv_period (cand->iv);
5479
5480   /* If the number of iterations is constant, compare against it directly.  */
5481   if (TREE_CODE (desc->niter) == INTEGER_CST)
5482     {
5483       /* See cand_value_at.  */
5484       if (stmt_after_increment (loop, cand, use->stmt))
5485         {
5486           if (!tree_int_cst_lt (desc->niter, period))
5487             return false;
5488         }
5489       else
5490         {
5491           if (tree_int_cst_lt (period, desc->niter))
5492             return false;
5493         }
5494     }
5495
5496   /* If not, and if this is the only possible exit of the loop, see whether
5497      we can get a conservative estimate on the number of iterations of the
5498      entire loop and compare against that instead.  */
5499   else
5500     {
5501       widest_int period_value, max_niter;
5502
5503       max_niter = desc->max;
5504       if (stmt_after_increment (loop, cand, use->stmt))
5505         max_niter += 1;
5506       period_value = wi::to_widest (period);
5507       if (wi::gtu_p (max_niter, period_value))
5508         {
5509           /* See if we can take advantage of inferred loop bound
5510              information.  */
5511           if (data->loop_single_exit_p)
5512             {
5513               if (!max_loop_iterations (loop, &max_niter))
5514                 return false;
5515               /* The loop bound is already adjusted by adding 1.  */
5516               if (wi::gtu_p (max_niter, period_value))
5517                 return false;
5518             }
5519           else
5520             return false;
5521         }
5522     }
5523
5524   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5525
5526   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5527                          aff_combination_to_tree (&bnd));
5528   *comp = iv_elimination_compare (data, use);
5529
5530   /* It is unlikely that computing the number of iterations using division
5531      would be more profitable than keeping the original induction variable.  */
5532   if (expression_expensive_p (*bound))
5533     return false;
5534
5535   /* Sometimes, it is possible to handle the situation that the number of
5536      iterations may be zero unless additional assumtions by using <
5537      instead of != in the exit condition.
5538
5539      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5540            base the exit condition on it.  However, that is often too
5541            expensive.  */
5542   if (!integer_zerop (desc->may_be_zero))
5543     return iv_elimination_compare_lt (data, cand, comp, desc);
5544
5545   return true;
5546 }
5547
5548  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5549     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5550
5551 static int
5552 parm_decl_cost (struct ivopts_data *data, tree bound)
5553 {
5554   tree sbound = bound;
5555   STRIP_NOPS (sbound);
5556
5557   if (TREE_CODE (sbound) == SSA_NAME
5558       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5559       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5560       && data->body_includes_call)
5561     return COSTS_N_INSNS (1);
5562
5563   return 0;
5564 }
5565
5566 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5567
5568 static bool
5569 determine_group_iv_cost_cond (struct ivopts_data *data,
5570                               struct iv_group *group, struct iv_cand *cand)
5571 {
5572   tree bound = NULL_TREE;
5573   struct iv *cmp_iv;
5574   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5575   comp_cost elim_cost, express_cost, cost, bound_cost;
5576   bool ok;
5577   iv_inv_expr_ent *elim_inv_expr = NULL, *express_inv_expr = NULL, *inv_expr;
5578   tree *control_var, *bound_cst;
5579   enum tree_code comp = ERROR_MARK;
5580   struct iv_use *use = group->vuses[0];
5581
5582   gcc_assert (cand->iv);
5583
5584   /* Try iv elimination.  */
5585   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5586     {
5587       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5588       if (elim_cost.cost == 0)
5589         elim_cost.cost = parm_decl_cost (data, bound);
5590       else if (TREE_CODE (bound) == INTEGER_CST)
5591         elim_cost.cost = 0;
5592       /* If we replace a loop condition 'i < n' with 'p < base + n',
5593          depends_on_elim will have 'base' and 'n' set, which implies
5594          that both 'base' and 'n' will be live during the loop.  More likely,
5595          'base + n' will be loop invariant, resulting in only one live value
5596          during the loop.  So in that case we clear depends_on_elim and set
5597         elim_inv_expr_id instead.  */
5598       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5599         {
5600           elim_inv_expr = record_inv_expr (data, bound);
5601           bitmap_clear (depends_on_elim);
5602         }
5603       /* The bound is a loop invariant, so it will be only computed
5604          once.  */
5605       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5606     }
5607   else
5608     elim_cost = infinite_cost;
5609
5610   /* Try expressing the original giv.  If it is compared with an invariant,
5611      note that we cannot get rid of it.  */
5612   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5613                               NULL, &cmp_iv);
5614   gcc_assert (ok);
5615
5616   /* When the condition is a comparison of the candidate IV against
5617      zero, prefer this IV.
5618
5619      TODO: The constant that we're subtracting from the cost should
5620      be target-dependent.  This information should be added to the
5621      target costs for each backend.  */
5622   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5623       && integer_zerop (*bound_cst)
5624       && (operand_equal_p (*control_var, cand->var_after, 0)
5625           || operand_equal_p (*control_var, cand->var_before, 0)))
5626     elim_cost -= 1;
5627
5628   express_cost = get_computation_cost (data, use, cand, false,
5629                                        &depends_on_express, NULL,
5630                                        &express_inv_expr);
5631   fd_ivopts_data = data;
5632   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5633
5634   /* Count the cost of the original bound as well.  */
5635   bound_cost = force_var_cost (data, *bound_cst, NULL);
5636   if (bound_cost.cost == 0)
5637     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5638   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5639     bound_cost.cost = 0;
5640   express_cost += bound_cost;
5641
5642   /* Choose the better approach, preferring the eliminated IV. */
5643   if (elim_cost <= express_cost)
5644     {
5645       cost = elim_cost;
5646       depends_on = depends_on_elim;
5647       depends_on_elim = NULL;
5648       inv_expr = elim_inv_expr;
5649     }
5650   else
5651     {
5652       cost = express_cost;
5653       depends_on = depends_on_express;
5654       depends_on_express = NULL;
5655       bound = NULL_TREE;
5656       comp = ERROR_MARK;
5657       inv_expr = express_inv_expr;
5658     }
5659
5660   set_group_iv_cost (data, group, cand, cost,
5661                      depends_on, bound, comp, inv_expr);
5662
5663   if (depends_on_elim)
5664     BITMAP_FREE (depends_on_elim);
5665   if (depends_on_express)
5666     BITMAP_FREE (depends_on_express);
5667
5668   return !cost.infinite_cost_p ();
5669 }
5670
5671 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5672    if USE cannot be represented with CAND.  */
5673
5674 static bool
5675 determine_group_iv_cost (struct ivopts_data *data,
5676                          struct iv_group *group, struct iv_cand *cand)
5677 {
5678   switch (group->type)
5679     {
5680     case USE_NONLINEAR_EXPR:
5681       return determine_group_iv_cost_generic (data, group, cand);
5682
5683     case USE_ADDRESS:
5684       return determine_group_iv_cost_address (data, group, cand);
5685
5686     case USE_COMPARE:
5687       return determine_group_iv_cost_cond (data, group, cand);
5688
5689     default:
5690       gcc_unreachable ();
5691     }
5692 }
5693
5694 /* Return true if get_computation_cost indicates that autoincrement is
5695    a possibility for the pair of USE and CAND, false otherwise.  */
5696
5697 static bool
5698 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5699                            struct iv_cand *cand)
5700 {
5701   bitmap depends_on;
5702   bool can_autoinc;
5703   comp_cost cost;
5704
5705   if (use->type != USE_ADDRESS)
5706     return false;
5707
5708   cost = get_computation_cost (data, use, cand, true, &depends_on,
5709                                &can_autoinc, NULL);
5710
5711   BITMAP_FREE (depends_on);
5712
5713   return !cost.infinite_cost_p () && can_autoinc;
5714 }
5715
5716 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5717    use that allows autoincrement, and set their AINC_USE if possible.  */
5718
5719 static void
5720 set_autoinc_for_original_candidates (struct ivopts_data *data)
5721 {
5722   unsigned i, j;
5723
5724   for (i = 0; i < data->vcands.length (); i++)
5725     {
5726       struct iv_cand *cand = data->vcands[i];
5727       struct iv_use *closest_before = NULL;
5728       struct iv_use *closest_after = NULL;
5729       if (cand->pos != IP_ORIGINAL)
5730         continue;
5731
5732       for (j = 0; j < data->vgroups.length (); j++)
5733         {
5734           struct iv_group *group = data->vgroups[j];
5735           struct iv_use *use = group->vuses[0];
5736           unsigned uid = gimple_uid (use->stmt);
5737
5738           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5739             continue;
5740
5741           if (uid < gimple_uid (cand->incremented_at)
5742               && (closest_before == NULL
5743                   || uid > gimple_uid (closest_before->stmt)))
5744             closest_before = use;
5745
5746           if (uid > gimple_uid (cand->incremented_at)
5747               && (closest_after == NULL
5748                   || uid < gimple_uid (closest_after->stmt)))
5749             closest_after = use;
5750         }
5751
5752       if (closest_before != NULL
5753           && autoinc_possible_for_pair (data, closest_before, cand))
5754         cand->ainc_use = closest_before;
5755       else if (closest_after != NULL
5756                && autoinc_possible_for_pair (data, closest_after, cand))
5757         cand->ainc_use = closest_after;
5758     }
5759 }
5760
5761 /* Finds the candidates for the induction variables.  */
5762
5763 static void
5764 find_iv_candidates (struct ivopts_data *data)
5765 {
5766   /* Add commonly used ivs.  */
5767   add_standard_iv_candidates (data);
5768
5769   /* Add old induction variables.  */
5770   add_iv_candidate_for_bivs (data);
5771
5772   /* Add induction variables derived from uses.  */
5773   add_iv_candidate_for_groups (data);
5774
5775   set_autoinc_for_original_candidates (data);
5776
5777   /* Record the important candidates.  */
5778   record_important_candidates (data);
5779
5780   if (dump_file && (dump_flags & TDF_DETAILS))
5781     {
5782       unsigned i;
5783
5784       fprintf (dump_file, "\n<Important Candidates>:\t");
5785       for (i = 0; i < data->vcands.length (); i++)
5786         if (data->vcands[i]->important)
5787           fprintf (dump_file, " %d,", data->vcands[i]->id);
5788       fprintf (dump_file, "\n");
5789
5790       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5791       for (i = 0; i < data->vgroups.length (); i++)
5792         {
5793           struct iv_group *group = data->vgroups[i];
5794
5795           if (group->related_cands)
5796             {
5797               fprintf (dump_file, "  Group %d:\t", group->id);
5798               dump_bitmap (dump_file, group->related_cands);
5799             }
5800         }
5801       fprintf (dump_file, "\n");
5802     }
5803 }
5804
5805 /* Determines costs of computing use of iv with an iv candidate.  */
5806
5807 static void
5808 determine_group_iv_costs (struct ivopts_data *data)
5809 {
5810   unsigned i, j;
5811   struct iv_cand *cand;
5812   struct iv_group *group;
5813   bitmap to_clear = BITMAP_ALLOC (NULL);
5814
5815   alloc_use_cost_map (data);
5816
5817   for (i = 0; i < data->vgroups.length (); i++)
5818     {
5819       group = data->vgroups[i];
5820
5821       if (data->consider_all_candidates)
5822         {
5823           for (j = 0; j < data->vcands.length (); j++)
5824             {
5825               cand = data->vcands[j];
5826               determine_group_iv_cost (data, group, cand);
5827             }
5828         }
5829       else
5830         {
5831           bitmap_iterator bi;
5832
5833           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5834             {
5835               cand = data->vcands[j];
5836               if (!determine_group_iv_cost (data, group, cand))
5837                 bitmap_set_bit (to_clear, j);
5838             }
5839
5840           /* Remove the candidates for that the cost is infinite from
5841              the list of related candidates.  */
5842           bitmap_and_compl_into (group->related_cands, to_clear);
5843           bitmap_clear (to_clear);
5844         }
5845     }
5846
5847   BITMAP_FREE (to_clear);
5848
5849   if (dump_file && (dump_flags & TDF_DETAILS))
5850     {
5851       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5852       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5853
5854       for (hash_table<iv_inv_expr_hasher>::iterator it
5855            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5856            ++it)
5857         list.safe_push (*it);
5858
5859       list.qsort (sort_iv_inv_expr_ent);
5860
5861       for (i = 0; i < list.length (); ++i)
5862         {
5863           fprintf (dump_file, "inv_expr %d: \t", i);
5864           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5865           fprintf (dump_file, "\n");
5866         }
5867
5868       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5869
5870       for (i = 0; i < data->vgroups.length (); i++)
5871         {
5872           group = data->vgroups[i];
5873
5874           fprintf (dump_file, "Group %d:\n", i);
5875           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.ex.\tdepends on\n");
5876           for (j = 0; j < group->n_map_members; j++)
5877             {
5878               if (!group->cost_map[j].cand
5879                   || group->cost_map[j].cost.infinite_cost_p ())
5880                 continue;
5881
5882               fprintf (dump_file, "  %d\t%d\t%d\t",
5883                        group->cost_map[j].cand->id,
5884                        group->cost_map[j].cost.cost,
5885                        group->cost_map[j].cost.complexity);
5886               if (group->cost_map[j].inv_expr != NULL)
5887                 fprintf (dump_file, "%d\t",
5888                          group->cost_map[j].inv_expr->id);
5889               else
5890                 fprintf (dump_file, "\t");
5891               if (group->cost_map[j].depends_on)
5892                 bitmap_print (dump_file,
5893                               group->cost_map[j].depends_on, "","");
5894               fprintf (dump_file, "\n");
5895             }
5896
5897           fprintf (dump_file, "\n");
5898         }
5899       fprintf (dump_file, "\n");
5900     }
5901 }
5902
5903 /* Determines cost of the candidate CAND.  */
5904
5905 static void
5906 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5907 {
5908   comp_cost cost_base;
5909   unsigned cost, cost_step;
5910   tree base;
5911
5912   if (!cand->iv)
5913     {
5914       cand->cost = 0;
5915       return;
5916     }
5917
5918   /* There are two costs associated with the candidate -- its increment
5919      and its initialization.  The second is almost negligible for any loop
5920      that rolls enough, so we take it just very little into account.  */
5921
5922   base = cand->iv->base;
5923   cost_base = force_var_cost (data, base, NULL);
5924   /* It will be exceptional that the iv register happens to be initialized with
5925      the proper value at no cost.  In general, there will at least be a regcopy
5926      or a const set.  */
5927   if (cost_base.cost == 0)
5928     cost_base.cost = COSTS_N_INSNS (1);
5929   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5930
5931   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5932
5933   /* Prefer the original ivs unless we may gain something by replacing it.
5934      The reason is to make debugging simpler; so this is not relevant for
5935      artificial ivs created by other optimization passes.  */
5936   if (cand->pos != IP_ORIGINAL
5937       || !SSA_NAME_VAR (cand->var_before)
5938       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5939     cost++;
5940
5941   /* Prefer not to insert statements into latch unless there are some
5942      already (so that we do not create unnecessary jumps).  */
5943   if (cand->pos == IP_END
5944       && empty_block_p (ip_end_pos (data->current_loop)))
5945     cost++;
5946
5947   cand->cost = cost;
5948   cand->cost_step = cost_step;
5949 }
5950
5951 /* Determines costs of computation of the candidates.  */
5952
5953 static void
5954 determine_iv_costs (struct ivopts_data *data)
5955 {
5956   unsigned i;
5957
5958   if (dump_file && (dump_flags & TDF_DETAILS))
5959     {
5960       fprintf (dump_file, "<Candidate Costs>:\n");
5961       fprintf (dump_file, "  cand\tcost\n");
5962     }
5963
5964   for (i = 0; i < data->vcands.length (); i++)
5965     {
5966       struct iv_cand *cand = data->vcands[i];
5967
5968       determine_iv_cost (data, cand);
5969
5970       if (dump_file && (dump_flags & TDF_DETAILS))
5971         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5972     }
5973
5974   if (dump_file && (dump_flags & TDF_DETAILS))
5975     fprintf (dump_file, "\n");
5976 }
5977
5978 /* Calculates cost for having SIZE induction variables.  */
5979
5980 static unsigned
5981 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5982 {
5983   /* We add size to the cost, so that we prefer eliminating ivs
5984      if possible.  */
5985   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5986                                             data->body_includes_call);
5987 }
5988
5989 /* For each size of the induction variable set determine the penalty.  */
5990
5991 static void
5992 determine_set_costs (struct ivopts_data *data)
5993 {
5994   unsigned j, n;
5995   gphi *phi;
5996   gphi_iterator psi;
5997   tree op;
5998   struct loop *loop = data->current_loop;
5999   bitmap_iterator bi;
6000
6001   if (dump_file && (dump_flags & TDF_DETAILS))
6002     {
6003       fprintf (dump_file, "<Global Costs>:\n");
6004       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
6005       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
6006       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6007       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6008     }
6009
6010   n = 0;
6011   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6012     {
6013       phi = psi.phi ();
6014       op = PHI_RESULT (phi);
6015
6016       if (virtual_operand_p (op))
6017         continue;
6018
6019       if (get_iv (data, op))
6020         continue;
6021
6022       n++;
6023     }
6024
6025   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6026     {
6027       struct version_info *info = ver_info (data, j);
6028
6029       if (info->inv_id && info->has_nonlin_use)
6030         n++;
6031     }
6032
6033   data->regs_used = n;
6034   if (dump_file && (dump_flags & TDF_DETAILS))
6035     fprintf (dump_file, "  regs_used %d\n", n);
6036
6037   if (dump_file && (dump_flags & TDF_DETAILS))
6038     {
6039       fprintf (dump_file, "  cost for size:\n");
6040       fprintf (dump_file, "  ivs\tcost\n");
6041       for (j = 0; j <= 2 * target_avail_regs; j++)
6042         fprintf (dump_file, "  %d\t%d\n", j,
6043                  ivopts_global_cost_for_size (data, j));
6044       fprintf (dump_file, "\n");
6045     }
6046 }
6047
6048 /* Returns true if A is a cheaper cost pair than B.  */
6049
6050 static bool
6051 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
6052 {
6053   if (!a)
6054     return false;
6055
6056   if (!b)
6057     return true;
6058
6059   if (a->cost < b->cost)
6060     return true;
6061
6062   if (b->cost < a->cost)
6063     return false;
6064
6065   /* In case the costs are the same, prefer the cheaper candidate.  */
6066   if (a->cand->cost < b->cand->cost)
6067     return true;
6068
6069   return false;
6070 }
6071
6072
6073 /* Returns candidate by that USE is expressed in IVS.  */
6074
6075 static struct cost_pair *
6076 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
6077 {
6078   return ivs->cand_for_group[group->id];
6079 }
6080
6081 /* Computes the cost field of IVS structure.  */
6082
6083 static void
6084 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6085 {
6086   comp_cost cost = ivs->cand_use_cost;
6087
6088   cost += ivs->cand_cost;
6089
6090   cost += ivopts_global_cost_for_size (data,
6091                                        ivs->n_regs
6092                                        + ivs->used_inv_exprs->elements ());
6093
6094   ivs->cost = cost;
6095 }
6096
6097 /* Remove invariants in set INVS to set IVS.  */
6098
6099 static void
6100 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6101 {
6102   bitmap_iterator bi;
6103   unsigned iid;
6104
6105   if (!invs)
6106     return;
6107
6108   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6109     {
6110       ivs->n_invariant_uses[iid]--;
6111       if (ivs->n_invariant_uses[iid] == 0)
6112         ivs->n_regs--;
6113     }
6114 }
6115
6116 /* Set USE not to be expressed by any candidate in IVS.  */
6117
6118 static void
6119 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6120                  struct iv_group *group)
6121 {
6122   unsigned gid = group->id, cid;
6123   struct cost_pair *cp;
6124
6125   cp = ivs->cand_for_group[gid];
6126   if (!cp)
6127     return;
6128   cid = cp->cand->id;
6129
6130   ivs->bad_groups++;
6131   ivs->cand_for_group[gid] = NULL;
6132   ivs->n_cand_uses[cid]--;
6133
6134   if (ivs->n_cand_uses[cid] == 0)
6135     {
6136       bitmap_clear_bit (ivs->cands, cid);
6137       /* Do not count the pseudocandidates.  */
6138       if (cp->cand->iv)
6139         ivs->n_regs--;
6140       ivs->n_cands--;
6141       ivs->cand_cost -= cp->cand->cost;
6142
6143       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6144     }
6145
6146   ivs->cand_use_cost -= cp->cost;
6147
6148   iv_ca_set_remove_invariants (ivs, cp->depends_on);
6149
6150   if (cp->inv_expr != NULL)
6151     {
6152       unsigned *slot = ivs->used_inv_exprs->get (cp->inv_expr);
6153       --(*slot);
6154       if (*slot == 0)
6155         ivs->used_inv_exprs->remove (cp->inv_expr);
6156     }
6157   iv_ca_recount_cost (data, ivs);
6158 }
6159
6160 /* Add invariants in set INVS to set IVS.  */
6161
6162 static void
6163 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6164 {
6165   bitmap_iterator bi;
6166   unsigned iid;
6167
6168   if (!invs)
6169     return;
6170
6171   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6172     {
6173       ivs->n_invariant_uses[iid]++;
6174       if (ivs->n_invariant_uses[iid] == 1)
6175         ivs->n_regs++;
6176     }
6177 }
6178
6179 /* Set cost pair for GROUP in set IVS to CP.  */
6180
6181 static void
6182 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6183               struct iv_group *group, struct cost_pair *cp)
6184 {
6185   unsigned gid = group->id, cid;
6186
6187   if (ivs->cand_for_group[gid] == cp)
6188     return;
6189
6190   if (ivs->cand_for_group[gid])
6191     iv_ca_set_no_cp (data, ivs, group);
6192
6193   if (cp)
6194     {
6195       cid = cp->cand->id;
6196
6197       ivs->bad_groups--;
6198       ivs->cand_for_group[gid] = cp;
6199       ivs->n_cand_uses[cid]++;
6200       if (ivs->n_cand_uses[cid] == 1)
6201         {
6202           bitmap_set_bit (ivs->cands, cid);
6203           /* Do not count the pseudocandidates.  */
6204           if (cp->cand->iv)
6205             ivs->n_regs++;
6206           ivs->n_cands++;
6207           ivs->cand_cost += cp->cand->cost;
6208
6209           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6210         }
6211
6212       ivs->cand_use_cost += cp->cost;
6213       iv_ca_set_add_invariants (ivs, cp->depends_on);
6214
6215       if (cp->inv_expr != NULL)
6216         {
6217           unsigned *slot = &ivs->used_inv_exprs->get_or_insert (cp->inv_expr);
6218           ++(*slot);
6219         }
6220       iv_ca_recount_cost (data, ivs);
6221     }
6222 }
6223
6224 /* Extend set IVS by expressing USE by some of the candidates in it
6225    if possible.  Consider all important candidates if candidates in
6226    set IVS don't give any result.  */
6227
6228 static void
6229 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
6230                struct iv_group *group)
6231 {
6232   struct cost_pair *best_cp = NULL, *cp;
6233   bitmap_iterator bi;
6234   unsigned i;
6235   struct iv_cand *cand;
6236
6237   gcc_assert (ivs->upto >= group->id);
6238   ivs->upto++;
6239   ivs->bad_groups++;
6240
6241   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6242     {
6243       cand = data->vcands[i];
6244       cp = get_group_iv_cost (data, group, cand);
6245       if (cheaper_cost_pair (cp, best_cp))
6246         best_cp = cp;
6247     }
6248
6249   if (best_cp == NULL)
6250     {
6251       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6252         {
6253           cand = data->vcands[i];
6254           cp = get_group_iv_cost (data, group, cand);
6255           if (cheaper_cost_pair (cp, best_cp))
6256             best_cp = cp;
6257         }
6258     }
6259
6260   iv_ca_set_cp (data, ivs, group, best_cp);
6261 }
6262
6263 /* Get cost for assignment IVS.  */
6264
6265 static comp_cost
6266 iv_ca_cost (struct iv_ca *ivs)
6267 {
6268   /* This was a conditional expression but it triggered a bug in
6269      Sun C 5.5.  */
6270   if (ivs->bad_groups)
6271     return infinite_cost;
6272   else
6273     return ivs->cost;
6274 }
6275
6276 /* Returns true if all dependences of CP are among invariants in IVS.  */
6277
6278 static bool
6279 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6280 {
6281   unsigned i;
6282   bitmap_iterator bi;
6283
6284   if (!cp->depends_on)
6285     return true;
6286
6287   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6288     {
6289       if (ivs->n_invariant_uses[i] == 0)
6290         return false;
6291     }
6292
6293   return true;
6294 }
6295
6296 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6297    it before NEXT.  */
6298
6299 static struct iv_ca_delta *
6300 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6301                  struct cost_pair *new_cp, struct iv_ca_delta *next)
6302 {
6303   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6304
6305   change->group = group;
6306   change->old_cp = old_cp;
6307   change->new_cp = new_cp;
6308   change->next = next;
6309
6310   return change;
6311 }
6312
6313 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6314    are rewritten.  */
6315
6316 static struct iv_ca_delta *
6317 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6318 {
6319   struct iv_ca_delta *last;
6320
6321   if (!l2)
6322     return l1;
6323
6324   if (!l1)
6325     return l2;
6326
6327   for (last = l1; last->next; last = last->next)
6328     continue;
6329   last->next = l2;
6330
6331   return l1;
6332 }
6333
6334 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6335
6336 static struct iv_ca_delta *
6337 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6338 {
6339   struct iv_ca_delta *act, *next, *prev = NULL;
6340
6341   for (act = delta; act; act = next)
6342     {
6343       next = act->next;
6344       act->next = prev;
6345       prev = act;
6346
6347       std::swap (act->old_cp, act->new_cp);
6348     }
6349
6350   return prev;
6351 }
6352
6353 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6354    reverted instead.  */
6355
6356 static void
6357 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6358                     struct iv_ca_delta *delta, bool forward)
6359 {
6360   struct cost_pair *from, *to;
6361   struct iv_ca_delta *act;
6362
6363   if (!forward)
6364     delta = iv_ca_delta_reverse (delta);
6365
6366   for (act = delta; act; act = act->next)
6367     {
6368       from = act->old_cp;
6369       to = act->new_cp;
6370       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6371       iv_ca_set_cp (data, ivs, act->group, to);
6372     }
6373
6374   if (!forward)
6375     iv_ca_delta_reverse (delta);
6376 }
6377
6378 /* Returns true if CAND is used in IVS.  */
6379
6380 static bool
6381 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6382 {
6383   return ivs->n_cand_uses[cand->id] > 0;
6384 }
6385
6386 /* Returns number of induction variable candidates in the set IVS.  */
6387
6388 static unsigned
6389 iv_ca_n_cands (struct iv_ca *ivs)
6390 {
6391   return ivs->n_cands;
6392 }
6393
6394 /* Free the list of changes DELTA.  */
6395
6396 static void
6397 iv_ca_delta_free (struct iv_ca_delta **delta)
6398 {
6399   struct iv_ca_delta *act, *next;
6400
6401   for (act = *delta; act; act = next)
6402     {
6403       next = act->next;
6404       free (act);
6405     }
6406
6407   *delta = NULL;
6408 }
6409
6410 /* Allocates new iv candidates assignment.  */
6411
6412 static struct iv_ca *
6413 iv_ca_new (struct ivopts_data *data)
6414 {
6415   struct iv_ca *nw = XNEW (struct iv_ca);
6416
6417   nw->upto = 0;
6418   nw->bad_groups = 0;
6419   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6420                                  data->vgroups.length ());
6421   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6422   nw->cands = BITMAP_ALLOC (NULL);
6423   nw->n_cands = 0;
6424   nw->n_regs = 0;
6425   nw->cand_use_cost = no_cost;
6426   nw->cand_cost = 0;
6427   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6428   nw->used_inv_exprs = new hash_map <iv_inv_expr_ent *, unsigned> (13);
6429   nw->cost = no_cost;
6430
6431   return nw;
6432 }
6433
6434 /* Free memory occupied by the set IVS.  */
6435
6436 static void
6437 iv_ca_free (struct iv_ca **ivs)
6438 {
6439   free ((*ivs)->cand_for_group);
6440   free ((*ivs)->n_cand_uses);
6441   BITMAP_FREE ((*ivs)->cands);
6442   free ((*ivs)->n_invariant_uses);
6443   delete ((*ivs)->used_inv_exprs);
6444   free (*ivs);
6445   *ivs = NULL;
6446 }
6447
6448 /* Dumps IVS to FILE.  */
6449
6450 static void
6451 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6452 {
6453   unsigned i;
6454   comp_cost cost = iv_ca_cost (ivs);
6455
6456   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6457            cost.complexity);
6458   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6459            ivs->cand_cost, ivs->cand_use_cost.cost,
6460            ivs->cand_use_cost.complexity);
6461   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6462
6463   for (i = 0; i < ivs->upto; i++)
6464     {
6465       struct iv_group *group = data->vgroups[i];
6466       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6467       if (cp)
6468         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6469                  group->id, cp->cand->id, cp->cost.cost,
6470                  cp->cost.complexity);
6471       else
6472         fprintf (file, "   group:%d --> ??\n", group->id);
6473     }
6474
6475   const char *pref = "";
6476   fprintf (file, "  invariant variables: ");
6477   for (i = 1; i <= data->max_inv_id; i++)
6478     if (ivs->n_invariant_uses[i])
6479       {
6480         fprintf (file, "%s%d", pref, i);
6481         pref = ", ";
6482       }
6483
6484   pref = "";
6485   fprintf (file, "\n  invariant expressions: ");
6486   for (hash_map<iv_inv_expr_ent *, unsigned>::iterator it
6487        = ivs->used_inv_exprs->begin (); it != ivs->used_inv_exprs->end (); ++it)
6488     {
6489         fprintf (file, "%s%d", pref, (*it).first->id);
6490         pref = ", ";
6491     }
6492
6493   fprintf (file, "\n\n");
6494 }
6495
6496 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6497    new set, and store differences in DELTA.  Number of induction variables
6498    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6499    the function will try to find a solution with mimimal iv candidates.  */
6500
6501 static comp_cost
6502 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6503               struct iv_cand *cand, struct iv_ca_delta **delta,
6504               unsigned *n_ivs, bool min_ncand)
6505 {
6506   unsigned i;
6507   comp_cost cost;
6508   struct iv_group *group;
6509   struct cost_pair *old_cp, *new_cp;
6510
6511   *delta = NULL;
6512   for (i = 0; i < ivs->upto; i++)
6513     {
6514       group = data->vgroups[i];
6515       old_cp = iv_ca_cand_for_group (ivs, group);
6516
6517       if (old_cp
6518           && old_cp->cand == cand)
6519         continue;
6520
6521       new_cp = get_group_iv_cost (data, group, cand);
6522       if (!new_cp)
6523         continue;
6524
6525       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6526         continue;
6527
6528       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6529         continue;
6530
6531       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6532     }
6533
6534   iv_ca_delta_commit (data, ivs, *delta, true);
6535   cost = iv_ca_cost (ivs);
6536   if (n_ivs)
6537     *n_ivs = iv_ca_n_cands (ivs);
6538   iv_ca_delta_commit (data, ivs, *delta, false);
6539
6540   return cost;
6541 }
6542
6543 /* Try narrowing set IVS by removing CAND.  Return the cost of
6544    the new set and store the differences in DELTA.  START is
6545    the candidate with which we start narrowing.  */
6546
6547 static comp_cost
6548 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6549               struct iv_cand *cand, struct iv_cand *start,
6550               struct iv_ca_delta **delta)
6551 {
6552   unsigned i, ci;
6553   struct iv_group *group;
6554   struct cost_pair *old_cp, *new_cp, *cp;
6555   bitmap_iterator bi;
6556   struct iv_cand *cnd;
6557   comp_cost cost, best_cost, acost;
6558
6559   *delta = NULL;
6560   for (i = 0; i < data->vgroups.length (); i++)
6561     {
6562       group = data->vgroups[i];
6563
6564       old_cp = iv_ca_cand_for_group (ivs, group);
6565       if (old_cp->cand != cand)
6566         continue;
6567
6568       best_cost = iv_ca_cost (ivs);
6569       /* Start narrowing with START.  */
6570       new_cp = get_group_iv_cost (data, group, start);
6571
6572       if (data->consider_all_candidates)
6573         {
6574           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6575             {
6576               if (ci == cand->id || (start && ci == start->id))
6577                 continue;
6578
6579               cnd = data->vcands[ci];
6580
6581               cp = get_group_iv_cost (data, group, cnd);
6582               if (!cp)
6583                 continue;
6584
6585               iv_ca_set_cp (data, ivs, group, cp);
6586               acost = iv_ca_cost (ivs);
6587
6588               if (acost < best_cost)
6589                 {
6590                   best_cost = acost;
6591                   new_cp = cp;
6592                 }
6593             }
6594         }
6595       else
6596         {
6597           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6598             {
6599               if (ci == cand->id || (start && ci == start->id))
6600                 continue;
6601
6602               cnd = data->vcands[ci];
6603
6604               cp = get_group_iv_cost (data, group, cnd);
6605               if (!cp)
6606                 continue;
6607
6608               iv_ca_set_cp (data, ivs, group, cp);
6609               acost = iv_ca_cost (ivs);
6610
6611               if (acost < best_cost)
6612                 {
6613                   best_cost = acost;
6614                   new_cp = cp;
6615                 }
6616             }
6617         }
6618       /* Restore to old cp for use.  */
6619       iv_ca_set_cp (data, ivs, group, old_cp);
6620
6621       if (!new_cp)
6622         {
6623           iv_ca_delta_free (delta);
6624           return infinite_cost;
6625         }
6626
6627       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6628     }
6629
6630   iv_ca_delta_commit (data, ivs, *delta, true);
6631   cost = iv_ca_cost (ivs);
6632   iv_ca_delta_commit (data, ivs, *delta, false);
6633
6634   return cost;
6635 }
6636
6637 /* Try optimizing the set of candidates IVS by removing candidates different
6638    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6639    differences in DELTA.  */
6640
6641 static comp_cost
6642 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6643              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6644 {
6645   bitmap_iterator bi;
6646   struct iv_ca_delta *act_delta, *best_delta;
6647   unsigned i;
6648   comp_cost best_cost, acost;
6649   struct iv_cand *cand;
6650
6651   best_delta = NULL;
6652   best_cost = iv_ca_cost (ivs);
6653
6654   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6655     {
6656       cand = data->vcands[i];
6657
6658       if (cand == except_cand)
6659         continue;
6660
6661       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6662
6663       if (acost < best_cost)
6664         {
6665           best_cost = acost;
6666           iv_ca_delta_free (&best_delta);
6667           best_delta = act_delta;
6668         }
6669       else
6670         iv_ca_delta_free (&act_delta);
6671     }
6672
6673   if (!best_delta)
6674     {
6675       *delta = NULL;
6676       return best_cost;
6677     }
6678
6679   /* Recurse to possibly remove other unnecessary ivs.  */
6680   iv_ca_delta_commit (data, ivs, best_delta, true);
6681   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6682   iv_ca_delta_commit (data, ivs, best_delta, false);
6683   *delta = iv_ca_delta_join (best_delta, *delta);
6684   return best_cost;
6685 }
6686
6687 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6688    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6689    the corresponding cost_pair, otherwise just return BEST_CP.  */
6690
6691 static struct cost_pair*
6692 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6693                         unsigned int cand_idx, struct iv_cand *old_cand,
6694                         struct cost_pair *best_cp)
6695 {
6696   struct iv_cand *cand;
6697   struct cost_pair *cp;
6698
6699   gcc_assert (old_cand != NULL && best_cp != NULL);
6700   if (cand_idx == old_cand->id)
6701     return best_cp;
6702
6703   cand = data->vcands[cand_idx];
6704   cp = get_group_iv_cost (data, group, cand);
6705   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6706     return cp;
6707
6708   return best_cp;
6709 }
6710
6711 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6712    which are used by more than one iv uses.  For each of those candidates,
6713    this function tries to represent iv uses under that candidate using
6714    other ones with lower local cost, then tries to prune the new set.
6715    If the new set has lower cost, It returns the new cost after recording
6716    candidate replacement in list DELTA.  */
6717
6718 static comp_cost
6719 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6720                struct iv_ca_delta **delta)
6721 {
6722   bitmap_iterator bi, bj;
6723   unsigned int i, j, k;
6724   struct iv_cand *cand;
6725   comp_cost orig_cost, acost;
6726   struct iv_ca_delta *act_delta, *tmp_delta;
6727   struct cost_pair *old_cp, *best_cp = NULL;
6728
6729   *delta = NULL;
6730   orig_cost = iv_ca_cost (ivs);
6731
6732   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6733     {
6734       if (ivs->n_cand_uses[i] == 1
6735           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6736         continue;
6737
6738       cand = data->vcands[i];
6739
6740       act_delta = NULL;
6741       /*  Represent uses under current candidate using other ones with
6742           lower local cost.  */
6743       for (j = 0; j < ivs->upto; j++)
6744         {
6745           struct iv_group *group = data->vgroups[j];
6746           old_cp = iv_ca_cand_for_group (ivs, group);
6747
6748           if (old_cp->cand != cand)
6749             continue;
6750
6751           best_cp = old_cp;
6752           if (data->consider_all_candidates)
6753             for (k = 0; k < data->vcands.length (); k++)
6754               best_cp = cheaper_cost_with_cand (data, group, k,
6755                                                 old_cp->cand, best_cp);
6756           else
6757             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6758               best_cp = cheaper_cost_with_cand (data, group, k,
6759                                                 old_cp->cand, best_cp);
6760
6761           if (best_cp == old_cp)
6762             continue;
6763
6764           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6765         }
6766       /* No need for further prune.  */
6767       if (!act_delta)
6768         continue;
6769
6770       /* Prune the new candidate set.  */
6771       iv_ca_delta_commit (data, ivs, act_delta, true);
6772       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6773       iv_ca_delta_commit (data, ivs, act_delta, false);
6774       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6775
6776       if (acost < orig_cost)
6777         {
6778           *delta = act_delta;
6779           return acost;
6780         }
6781       else
6782         iv_ca_delta_free (&act_delta);
6783     }
6784
6785   return orig_cost;
6786 }
6787
6788 /* Tries to extend the sets IVS in the best possible way in order to
6789    express the GROUP.  If ORIGINALP is true, prefer candidates from
6790    the original set of IVs, otherwise favor important candidates not
6791    based on any memory object.  */
6792
6793 static bool
6794 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6795                   struct iv_group *group, bool originalp)
6796 {
6797   comp_cost best_cost, act_cost;
6798   unsigned i;
6799   bitmap_iterator bi;
6800   struct iv_cand *cand;
6801   struct iv_ca_delta *best_delta = NULL, *act_delta;
6802   struct cost_pair *cp;
6803
6804   iv_ca_add_group (data, ivs, group);
6805   best_cost = iv_ca_cost (ivs);
6806   cp = iv_ca_cand_for_group (ivs, group);
6807   if (cp)
6808     {
6809       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6810       iv_ca_set_no_cp (data, ivs, group);
6811     }
6812
6813   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6814      first try important candidates not based on any memory object.  Only if
6815      this fails, try the specific ones.  Rationale -- in loops with many
6816      variables the best choice often is to use just one generic biv.  If we
6817      added here many ivs specific to the uses, the optimization algorithm later
6818      would be likely to get stuck in a local minimum, thus causing us to create
6819      too many ivs.  The approach from few ivs to more seems more likely to be
6820      successful -- starting from few ivs, replacing an expensive use by a
6821      specific iv should always be a win.  */
6822   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6823     {
6824       cand = data->vcands[i];
6825
6826       if (originalp && cand->pos !=IP_ORIGINAL)
6827         continue;
6828
6829       if (!originalp && cand->iv->base_object != NULL_TREE)
6830         continue;
6831
6832       if (iv_ca_cand_used_p (ivs, cand))
6833         continue;
6834
6835       cp = get_group_iv_cost (data, group, cand);
6836       if (!cp)
6837         continue;
6838
6839       iv_ca_set_cp (data, ivs, group, cp);
6840       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6841                                true);
6842       iv_ca_set_no_cp (data, ivs, group);
6843       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6844
6845       if (act_cost < best_cost)
6846         {
6847           best_cost = act_cost;
6848
6849           iv_ca_delta_free (&best_delta);
6850           best_delta = act_delta;
6851         }
6852       else
6853         iv_ca_delta_free (&act_delta);
6854     }
6855
6856   if (best_cost.infinite_cost_p ())
6857     {
6858       for (i = 0; i < group->n_map_members; i++)
6859         {
6860           cp = group->cost_map + i;
6861           cand = cp->cand;
6862           if (!cand)
6863             continue;
6864
6865           /* Already tried this.  */
6866           if (cand->important)
6867             {
6868               if (originalp && cand->pos == IP_ORIGINAL)
6869                 continue;
6870               if (!originalp && cand->iv->base_object == NULL_TREE)
6871                 continue;
6872             }
6873
6874           if (iv_ca_cand_used_p (ivs, cand))
6875             continue;
6876
6877           act_delta = NULL;
6878           iv_ca_set_cp (data, ivs, group, cp);
6879           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6880           iv_ca_set_no_cp (data, ivs, group);
6881           act_delta = iv_ca_delta_add (group,
6882                                        iv_ca_cand_for_group (ivs, group),
6883                                        cp, act_delta);
6884
6885           if (act_cost < best_cost)
6886             {
6887               best_cost = act_cost;
6888
6889               if (best_delta)
6890                 iv_ca_delta_free (&best_delta);
6891               best_delta = act_delta;
6892             }
6893           else
6894             iv_ca_delta_free (&act_delta);
6895         }
6896     }
6897
6898   iv_ca_delta_commit (data, ivs, best_delta, true);
6899   iv_ca_delta_free (&best_delta);
6900
6901   return !best_cost.infinite_cost_p ();
6902 }
6903
6904 /* Finds an initial assignment of candidates to uses.  */
6905
6906 static struct iv_ca *
6907 get_initial_solution (struct ivopts_data *data, bool originalp)
6908 {
6909   unsigned i;
6910   struct iv_ca *ivs = iv_ca_new (data);
6911
6912   for (i = 0; i < data->vgroups.length (); i++)
6913     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6914       {
6915         iv_ca_free (&ivs);
6916         return NULL;
6917       }
6918
6919   return ivs;
6920 }
6921
6922 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6923    points to a bool variable, this function tries to break local
6924    optimal fixed-point by replacing candidates in IVS if it's true.  */
6925
6926 static bool
6927 try_improve_iv_set (struct ivopts_data *data,
6928                     struct iv_ca *ivs, bool *try_replace_p)
6929 {
6930   unsigned i, n_ivs;
6931   comp_cost acost, best_cost = iv_ca_cost (ivs);
6932   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6933   struct iv_cand *cand;
6934
6935   /* Try extending the set of induction variables by one.  */
6936   for (i = 0; i < data->vcands.length (); i++)
6937     {
6938       cand = data->vcands[i];
6939
6940       if (iv_ca_cand_used_p (ivs, cand))
6941         continue;
6942
6943       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6944       if (!act_delta)
6945         continue;
6946
6947       /* If we successfully added the candidate and the set is small enough,
6948          try optimizing it by removing other candidates.  */
6949       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6950         {
6951           iv_ca_delta_commit (data, ivs, act_delta, true);
6952           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6953           iv_ca_delta_commit (data, ivs, act_delta, false);
6954           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6955         }
6956
6957       if (acost < best_cost)
6958         {
6959           best_cost = acost;
6960           iv_ca_delta_free (&best_delta);
6961           best_delta = act_delta;
6962         }
6963       else
6964         iv_ca_delta_free (&act_delta);
6965     }
6966
6967   if (!best_delta)
6968     {
6969       /* Try removing the candidates from the set instead.  */
6970       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6971
6972       if (!best_delta && *try_replace_p)
6973         {
6974           *try_replace_p = false;
6975           /* So far candidate selecting algorithm tends to choose fewer IVs
6976              so that it can handle cases in which loops have many variables
6977              but the best choice is often to use only one general biv.  One
6978              weakness is it can't handle opposite cases, in which different
6979              candidates should be chosen with respect to each use.  To solve
6980              the problem, we replace candidates in a manner described by the
6981              comments of iv_ca_replace, thus give general algorithm a chance
6982              to break local optimal fixed-point in these cases.  */
6983           best_cost = iv_ca_replace (data, ivs, &best_delta);
6984         }
6985
6986       if (!best_delta)
6987         return false;
6988     }
6989
6990   iv_ca_delta_commit (data, ivs, best_delta, true);
6991   gcc_assert (best_cost == iv_ca_cost (ivs));
6992   iv_ca_delta_free (&best_delta);
6993   return true;
6994 }
6995
6996 /* Attempts to find the optimal set of induction variables.  We do simple
6997    greedy heuristic -- we try to replace at most one candidate in the selected
6998    solution and remove the unused ivs while this improves the cost.  */
6999
7000 static struct iv_ca *
7001 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7002 {
7003   struct iv_ca *set;
7004   bool try_replace_p = true;
7005
7006   /* Get the initial solution.  */
7007   set = get_initial_solution (data, originalp);
7008   if (!set)
7009     {
7010       if (dump_file && (dump_flags & TDF_DETAILS))
7011         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7012       return NULL;
7013     }
7014
7015   if (dump_file && (dump_flags & TDF_DETAILS))
7016     {
7017       fprintf (dump_file, "Initial set of candidates:\n");
7018       iv_ca_dump (data, dump_file, set);
7019     }
7020
7021   while (try_improve_iv_set (data, set, &try_replace_p))
7022     {
7023       if (dump_file && (dump_flags & TDF_DETAILS))
7024         {
7025           fprintf (dump_file, "Improved to:\n");
7026           iv_ca_dump (data, dump_file, set);
7027         }
7028     }
7029
7030   return set;
7031 }
7032
7033 static struct iv_ca *
7034 find_optimal_iv_set (struct ivopts_data *data)
7035 {
7036   unsigned i;
7037   comp_cost cost, origcost;
7038   struct iv_ca *set, *origset;
7039
7040   /* Determine the cost based on a strategy that starts with original IVs,
7041      and try again using a strategy that prefers candidates not based
7042      on any IVs.  */
7043   origset = find_optimal_iv_set_1 (data, true);
7044   set = find_optimal_iv_set_1 (data, false);
7045
7046   if (!origset && !set)
7047     return NULL;
7048
7049   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7050   cost = set ? iv_ca_cost (set) : infinite_cost;
7051
7052   if (dump_file && (dump_flags & TDF_DETAILS))
7053     {
7054       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
7055                origcost.cost, origcost.complexity);
7056       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
7057                cost.cost, cost.complexity);
7058     }
7059
7060   /* Choose the one with the best cost.  */
7061   if (origcost <= cost)
7062     {
7063       if (set)
7064         iv_ca_free (&set);
7065       set = origset;
7066     }
7067   else if (origset)
7068     iv_ca_free (&origset);
7069
7070   for (i = 0; i < data->vgroups.length (); i++)
7071     {
7072       struct iv_group *group = data->vgroups[i];
7073       group->selected = iv_ca_cand_for_group (set, group)->cand;
7074     }
7075
7076   return set;
7077 }
7078
7079 /* Creates a new induction variable corresponding to CAND.  */
7080
7081 static void
7082 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7083 {
7084   gimple_stmt_iterator incr_pos;
7085   tree base;
7086   struct iv_use *use;
7087   struct iv_group *group;
7088   bool after = false;
7089
7090   if (!cand->iv)
7091     return;
7092
7093   switch (cand->pos)
7094     {
7095     case IP_NORMAL:
7096       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7097       break;
7098
7099     case IP_END:
7100       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7101       after = true;
7102       break;
7103
7104     case IP_AFTER_USE:
7105       after = true;
7106       /* fall through */
7107     case IP_BEFORE_USE:
7108       incr_pos = gsi_for_stmt (cand->incremented_at);
7109       break;
7110
7111     case IP_ORIGINAL:
7112       /* Mark that the iv is preserved.  */
7113       name_info (data, cand->var_before)->preserve_biv = true;
7114       name_info (data, cand->var_after)->preserve_biv = true;
7115
7116       /* Rewrite the increment so that it uses var_before directly.  */
7117       use = find_interesting_uses_op (data, cand->var_after);
7118       group = data->vgroups[use->group_id];
7119       group->selected = cand;
7120       return;
7121     }
7122
7123   gimple_add_tmp_var (cand->var_before);
7124
7125   base = unshare_expr (cand->iv->base);
7126
7127   create_iv (base, unshare_expr (cand->iv->step),
7128              cand->var_before, data->current_loop,
7129              &incr_pos, after, &cand->var_before, &cand->var_after);
7130 }
7131
7132 /* Creates new induction variables described in SET.  */
7133
7134 static void
7135 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7136 {
7137   unsigned i;
7138   struct iv_cand *cand;
7139   bitmap_iterator bi;
7140
7141   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7142     {
7143       cand = data->vcands[i];
7144       create_new_iv (data, cand);
7145     }
7146
7147   if (dump_file && (dump_flags & TDF_DETAILS))
7148     {
7149       fprintf (dump_file, "Selected IV set for loop %d",
7150                data->current_loop->num);
7151       if (data->loop_loc != UNKNOWN_LOCATION)
7152         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7153                  LOCATION_LINE (data->loop_loc));
7154       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7155                avg_loop_niter (data->current_loop));
7156       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " expressions",
7157                (unsigned HOST_WIDE_INT) set->used_inv_exprs->elements ());
7158       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7159       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7160         {
7161           cand = data->vcands[i];
7162           dump_cand (dump_file, cand);
7163         }
7164       fprintf (dump_file, "\n");
7165     }
7166 }
7167
7168 /* Rewrites USE (definition of iv used in a nonlinear expression)
7169    using candidate CAND.  */
7170
7171 static void
7172 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7173                             struct iv_use *use, struct iv_cand *cand)
7174 {
7175   tree comp;
7176   tree op, tgt;
7177   gassign *ass;
7178   gimple_stmt_iterator bsi;
7179
7180   /* An important special case -- if we are asked to express value of
7181      the original iv by itself, just exit; there is no need to
7182      introduce a new computation (that might also need casting the
7183      variable to unsigned and back).  */
7184   if (cand->pos == IP_ORIGINAL
7185       && cand->incremented_at == use->stmt)
7186     {
7187       enum tree_code stmt_code;
7188
7189       gcc_assert (is_gimple_assign (use->stmt));
7190       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7191
7192       /* Check whether we may leave the computation unchanged.
7193          This is the case only if it does not rely on other
7194          computations in the loop -- otherwise, the computation
7195          we rely upon may be removed in remove_unused_ivs,
7196          thus leading to ICE.  */
7197       stmt_code = gimple_assign_rhs_code (use->stmt);
7198       if (stmt_code == PLUS_EXPR
7199           || stmt_code == MINUS_EXPR
7200           || stmt_code == POINTER_PLUS_EXPR)
7201         {
7202           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7203             op = gimple_assign_rhs2 (use->stmt);
7204           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7205             op = gimple_assign_rhs1 (use->stmt);
7206           else
7207             op = NULL_TREE;
7208         }
7209       else
7210         op = NULL_TREE;
7211
7212       if (op && expr_invariant_in_loop_p (data->current_loop, op))
7213         return;
7214     }
7215
7216   comp = get_computation (data->current_loop, use, cand);
7217   gcc_assert (comp != NULL_TREE);
7218
7219   switch (gimple_code (use->stmt))
7220     {
7221     case GIMPLE_PHI:
7222       tgt = PHI_RESULT (use->stmt);
7223
7224       /* If we should keep the biv, do not replace it.  */
7225       if (name_info (data, tgt)->preserve_biv)
7226         return;
7227
7228       bsi = gsi_after_labels (gimple_bb (use->stmt));
7229       break;
7230
7231     case GIMPLE_ASSIGN:
7232       tgt = gimple_assign_lhs (use->stmt);
7233       bsi = gsi_for_stmt (use->stmt);
7234       break;
7235
7236     default:
7237       gcc_unreachable ();
7238     }
7239
7240   if (!valid_gimple_rhs_p (comp)
7241       || (gimple_code (use->stmt) != GIMPLE_PHI
7242           /* We can't allow re-allocating the stmt as it might be pointed
7243              to still.  */
7244           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7245               >= gimple_num_ops (gsi_stmt (bsi)))))
7246     {
7247       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7248                                        true, GSI_SAME_STMT);
7249       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7250         {
7251           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7252           /* As this isn't a plain copy we have to reset alignment
7253              information.  */
7254           if (SSA_NAME_PTR_INFO (comp))
7255             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7256         }
7257     }
7258
7259   if (gimple_code (use->stmt) == GIMPLE_PHI)
7260     {
7261       ass = gimple_build_assign (tgt, comp);
7262       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7263
7264       bsi = gsi_for_stmt (use->stmt);
7265       remove_phi_node (&bsi, false);
7266     }
7267   else
7268     {
7269       gimple_assign_set_rhs_from_tree (&bsi, comp);
7270       use->stmt = gsi_stmt (bsi);
7271     }
7272 }
7273
7274 /* Performs a peephole optimization to reorder the iv update statement with
7275    a mem ref to enable instruction combining in later phases. The mem ref uses
7276    the iv value before the update, so the reordering transformation requires
7277    adjustment of the offset. CAND is the selected IV_CAND.
7278
7279    Example:
7280
7281    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7282    iv2 = iv1 + 1;
7283
7284    if (t < val)      (1)
7285      goto L;
7286    goto Head;
7287
7288
7289    directly propagating t over to (1) will introduce overlapping live range
7290    thus increase register pressure. This peephole transform it into:
7291
7292
7293    iv2 = iv1 + 1;
7294    t = MEM_REF (base, iv2, 8, 8);
7295    if (t < val)
7296      goto L;
7297    goto Head;
7298 */
7299
7300 static void
7301 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7302 {
7303   tree var_after;
7304   gimple *iv_update, *stmt;
7305   basic_block bb;
7306   gimple_stmt_iterator gsi, gsi_iv;
7307
7308   if (cand->pos != IP_NORMAL)
7309     return;
7310
7311   var_after = cand->var_after;
7312   iv_update = SSA_NAME_DEF_STMT (var_after);
7313
7314   bb = gimple_bb (iv_update);
7315   gsi = gsi_last_nondebug_bb (bb);
7316   stmt = gsi_stmt (gsi);
7317
7318   /* Only handle conditional statement for now.  */
7319   if (gimple_code (stmt) != GIMPLE_COND)
7320     return;
7321
7322   gsi_prev_nondebug (&gsi);
7323   stmt = gsi_stmt (gsi);
7324   if (stmt != iv_update)
7325     return;
7326
7327   gsi_prev_nondebug (&gsi);
7328   if (gsi_end_p (gsi))
7329     return;
7330
7331   stmt = gsi_stmt (gsi);
7332   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7333     return;
7334
7335   if (stmt != use->stmt)
7336     return;
7337
7338   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7339     return;
7340
7341   if (dump_file && (dump_flags & TDF_DETAILS))
7342     {
7343       fprintf (dump_file, "Reordering \n");
7344       print_gimple_stmt (dump_file, iv_update, 0, 0);
7345       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7346       fprintf (dump_file, "\n");
7347     }
7348
7349   gsi = gsi_for_stmt (use->stmt);
7350   gsi_iv = gsi_for_stmt (iv_update);
7351   gsi_move_before (&gsi_iv, &gsi);
7352
7353   cand->pos = IP_BEFORE_USE;
7354   cand->incremented_at = use->stmt;
7355 }
7356
7357 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7358
7359 static void
7360 rewrite_use_address (struct ivopts_data *data,
7361                      struct iv_use *use, struct iv_cand *cand)
7362 {
7363   aff_tree aff;
7364   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7365   tree base_hint = NULL_TREE;
7366   tree ref, iv;
7367   bool ok;
7368
7369   adjust_iv_update_pos (cand, use);
7370   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7371   gcc_assert (ok);
7372   unshare_aff_combination (&aff);
7373
7374   /* To avoid undefined overflow problems, all IV candidates use unsigned
7375      integer types.  The drawback is that this makes it impossible for
7376      create_mem_ref to distinguish an IV that is based on a memory object
7377      from one that represents simply an offset.
7378
7379      To work around this problem, we pass a hint to create_mem_ref that
7380      indicates which variable (if any) in aff is an IV based on a memory
7381      object.  Note that we only consider the candidate.  If this is not
7382      based on an object, the base of the reference is in some subexpression
7383      of the use -- but these will use pointer types, so they are recognized
7384      by the create_mem_ref heuristics anyway.  */
7385   if (cand->iv->base_object)
7386     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7387
7388   iv = var_at_stmt (data->current_loop, cand, use->stmt);
7389   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7390                         reference_alias_ptr_type (*use->op_p),
7391                         iv, base_hint, data->speed);
7392   copy_ref_info (ref, *use->op_p);
7393   *use->op_p = ref;
7394 }
7395
7396 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7397    candidate CAND.  */
7398
7399 static void
7400 rewrite_use_compare (struct ivopts_data *data,
7401                      struct iv_use *use, struct iv_cand *cand)
7402 {
7403   tree comp, *var_p, op, bound;
7404   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7405   enum tree_code compare;
7406   struct iv_group *group = data->vgroups[use->group_id];
7407   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7408   bool ok;
7409
7410   bound = cp->value;
7411   if (bound)
7412     {
7413       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7414       tree var_type = TREE_TYPE (var);
7415       gimple_seq stmts;
7416
7417       if (dump_file && (dump_flags & TDF_DETAILS))
7418         {
7419           fprintf (dump_file, "Replacing exit test: ");
7420           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7421         }
7422       compare = cp->comp;
7423       bound = unshare_expr (fold_convert (var_type, bound));
7424       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7425       if (stmts)
7426         gsi_insert_seq_on_edge_immediate (
7427                 loop_preheader_edge (data->current_loop),
7428                 stmts);
7429
7430       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7431       gimple_cond_set_lhs (cond_stmt, var);
7432       gimple_cond_set_code (cond_stmt, compare);
7433       gimple_cond_set_rhs (cond_stmt, op);
7434       return;
7435     }
7436
7437   /* The induction variable elimination failed; just express the original
7438      giv.  */
7439   comp = get_computation (data->current_loop, use, cand);
7440   gcc_assert (comp != NULL_TREE);
7441
7442   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7443   gcc_assert (ok);
7444
7445   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7446                                      true, GSI_SAME_STMT);
7447 }
7448
7449 /* Rewrite the groups using the selected induction variables.  */
7450
7451 static void
7452 rewrite_groups (struct ivopts_data *data)
7453 {
7454   unsigned i, j;
7455
7456   for (i = 0; i < data->vgroups.length (); i++)
7457     {
7458       struct iv_group *group = data->vgroups[i];
7459       struct iv_cand *cand = group->selected;
7460
7461       gcc_assert (cand);
7462
7463       if (group->type == USE_NONLINEAR_EXPR)
7464         {
7465           for (j = 0; j < group->vuses.length (); j++)
7466             {
7467               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7468               update_stmt (group->vuses[j]->stmt);
7469             }
7470         }
7471       else if (group->type == USE_ADDRESS)
7472         {
7473           for (j = 0; j < group->vuses.length (); j++)
7474             {
7475               rewrite_use_address (data, group->vuses[j], cand);
7476               update_stmt (group->vuses[j]->stmt);
7477             }
7478         }
7479       else
7480         {
7481           gcc_assert (group->type == USE_COMPARE);
7482
7483           for (j = 0; j < group->vuses.length (); j++)
7484             {
7485               rewrite_use_compare (data, group->vuses[j], cand);
7486               update_stmt (group->vuses[j]->stmt);
7487             }
7488         }
7489     }
7490 }
7491
7492 /* Removes the ivs that are not used after rewriting.  */
7493
7494 static void
7495 remove_unused_ivs (struct ivopts_data *data)
7496 {
7497   unsigned j;
7498   bitmap_iterator bi;
7499   bitmap toremove = BITMAP_ALLOC (NULL);
7500
7501   /* Figure out an order in which to release SSA DEFs so that we don't
7502      release something that we'd have to propagate into a debug stmt
7503      afterwards.  */
7504   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7505     {
7506       struct version_info *info;
7507
7508       info = ver_info (data, j);
7509       if (info->iv
7510           && !integer_zerop (info->iv->step)
7511           && !info->inv_id
7512           && !info->iv->nonlin_use
7513           && !info->preserve_biv)
7514         {
7515           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7516
7517           tree def = info->iv->ssa_name;
7518
7519           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7520             {
7521               imm_use_iterator imm_iter;
7522               use_operand_p use_p;
7523               gimple *stmt;
7524               int count = 0;
7525
7526               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7527                 {
7528                   if (!gimple_debug_bind_p (stmt))
7529                     continue;
7530
7531                   /* We just want to determine whether to do nothing
7532                      (count == 0), to substitute the computed
7533                      expression into a single use of the SSA DEF by
7534                      itself (count == 1), or to use a debug temp
7535                      because the SSA DEF is used multiple times or as
7536                      part of a larger expression (count > 1). */
7537                   count++;
7538                   if (gimple_debug_bind_get_value (stmt) != def)
7539                     count++;
7540
7541                   if (count > 1)
7542                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7543                 }
7544
7545               if (!count)
7546                 continue;
7547
7548               struct iv_use dummy_use;
7549               struct iv_cand *best_cand = NULL, *cand;
7550               unsigned i, best_pref = 0, cand_pref;
7551
7552               memset (&dummy_use, 0, sizeof (dummy_use));
7553               dummy_use.iv = info->iv;
7554               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7555                 {
7556                   cand = data->vgroups[i]->selected;
7557                   if (cand == best_cand)
7558                     continue;
7559                   cand_pref = operand_equal_p (cand->iv->step,
7560                                                info->iv->step, 0)
7561                     ? 4 : 0;
7562                   cand_pref
7563                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7564                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7565                     ? 2 : 0;
7566                   cand_pref
7567                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7568                     ? 1 : 0;
7569                   if (best_cand == NULL || best_pref < cand_pref)
7570                     {
7571                       best_cand = cand;
7572                       best_pref = cand_pref;
7573                     }
7574                 }
7575
7576               if (!best_cand)
7577                 continue;
7578
7579               tree comp = get_computation_at (data->current_loop,
7580                                               &dummy_use, best_cand,
7581                                               SSA_NAME_DEF_STMT (def));
7582               if (!comp)
7583                 continue;
7584
7585               if (count > 1)
7586                 {
7587                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7588                   DECL_ARTIFICIAL (vexpr) = 1;
7589                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7590                   if (SSA_NAME_VAR (def))
7591                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7592                   else
7593                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7594                   gdebug *def_temp
7595                     = gimple_build_debug_bind (vexpr, comp, NULL);
7596                   gimple_stmt_iterator gsi;
7597
7598                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7599                     gsi = gsi_after_labels (gimple_bb
7600                                             (SSA_NAME_DEF_STMT (def)));
7601                   else
7602                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7603
7604                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7605                   comp = vexpr;
7606                 }
7607
7608               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7609                 {
7610                   if (!gimple_debug_bind_p (stmt))
7611                     continue;
7612
7613                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7614                     SET_USE (use_p, comp);
7615
7616                   update_stmt (stmt);
7617                 }
7618             }
7619         }
7620     }
7621
7622   release_defs_bitset (toremove);
7623
7624   BITMAP_FREE (toremove);
7625 }
7626
7627 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7628    for hash_map::traverse.  */
7629
7630 bool
7631 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7632 {
7633   free (value);
7634   return true;
7635 }
7636
7637 /* Frees data allocated by the optimization of a single loop.  */
7638
7639 static void
7640 free_loop_data (struct ivopts_data *data)
7641 {
7642   unsigned i, j;
7643   bitmap_iterator bi;
7644   tree obj;
7645
7646   if (data->niters)
7647     {
7648       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7649       delete data->niters;
7650       data->niters = NULL;
7651     }
7652
7653   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7654     {
7655       struct version_info *info;
7656
7657       info = ver_info (data, i);
7658       info->iv = NULL;
7659       info->has_nonlin_use = false;
7660       info->preserve_biv = false;
7661       info->inv_id = 0;
7662     }
7663   bitmap_clear (data->relevant);
7664   bitmap_clear (data->important_candidates);
7665
7666   for (i = 0; i < data->vgroups.length (); i++)
7667     {
7668       struct iv_group *group = data->vgroups[i];
7669
7670       for (j = 0; j < group->vuses.length (); j++)
7671         free (group->vuses[j]);
7672       group->vuses.release ();
7673
7674       BITMAP_FREE (group->related_cands);
7675       for (j = 0; j < group->n_map_members; j++)
7676         if (group->cost_map[j].depends_on)
7677           BITMAP_FREE (group->cost_map[j].depends_on);
7678
7679       free (group->cost_map);
7680       free (group);
7681     }
7682   data->vgroups.truncate (0);
7683
7684   for (i = 0; i < data->vcands.length (); i++)
7685     {
7686       struct iv_cand *cand = data->vcands[i];
7687
7688       if (cand->depends_on)
7689         BITMAP_FREE (cand->depends_on);
7690       free (cand);
7691     }
7692   data->vcands.truncate (0);
7693
7694   if (data->version_info_size < num_ssa_names)
7695     {
7696       data->version_info_size = 2 * num_ssa_names;
7697       free (data->version_info);
7698       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7699     }
7700
7701   data->max_inv_id = 0;
7702
7703   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7704     SET_DECL_RTL (obj, NULL_RTX);
7705
7706   decl_rtl_to_reset.truncate (0);
7707
7708   data->inv_expr_tab->empty ();
7709   data->max_inv_expr_id = 0;
7710
7711   data->iv_common_cand_tab->empty ();
7712   data->iv_common_cands.truncate (0);
7713 }
7714
7715 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7716    loop tree.  */
7717
7718 static void
7719 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7720 {
7721   free_loop_data (data);
7722   free (data->version_info);
7723   BITMAP_FREE (data->relevant);
7724   BITMAP_FREE (data->important_candidates);
7725
7726   decl_rtl_to_reset.release ();
7727   data->vgroups.release ();
7728   data->vcands.release ();
7729   delete data->inv_expr_tab;
7730   data->inv_expr_tab = NULL;
7731   free_affine_expand_cache (&data->name_expansion_cache);
7732   delete data->iv_common_cand_tab;
7733   data->iv_common_cand_tab = NULL;
7734   data->iv_common_cands.release ();
7735   obstack_free (&data->iv_obstack, NULL);
7736 }
7737
7738 /* Returns true if the loop body BODY includes any function calls.  */
7739
7740 static bool
7741 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7742 {
7743   gimple_stmt_iterator gsi;
7744   unsigned i;
7745
7746   for (i = 0; i < num_nodes; i++)
7747     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7748       {
7749         gimple *stmt = gsi_stmt (gsi);
7750         if (is_gimple_call (stmt)
7751             && !gimple_call_internal_p (stmt)
7752             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7753           return true;
7754       }
7755   return false;
7756 }
7757
7758 /* Optimizes the LOOP.  Returns true if anything changed.  */
7759
7760 static bool
7761 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7762 {
7763   bool changed = false;
7764   struct iv_ca *iv_ca;
7765   edge exit = single_dom_exit (loop);
7766   basic_block *body;
7767
7768   gcc_assert (!data->niters);
7769   data->current_loop = loop;
7770   data->loop_loc = find_loop_location (loop);
7771   data->speed = optimize_loop_for_speed_p (loop);
7772
7773   if (dump_file && (dump_flags & TDF_DETAILS))
7774     {
7775       fprintf (dump_file, "Processing loop %d", loop->num);
7776       if (data->loop_loc != UNKNOWN_LOCATION)
7777         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7778                  LOCATION_LINE (data->loop_loc));
7779       fprintf (dump_file, "\n");
7780
7781       if (exit)
7782         {
7783           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7784                    exit->src->index, exit->dest->index);
7785           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7786           fprintf (dump_file, "\n");
7787         }
7788
7789       fprintf (dump_file, "\n");
7790     }
7791
7792   body = get_loop_body (loop);
7793   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7794   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7795   free (body);
7796
7797   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7798
7799   /* For each ssa name determines whether it behaves as an induction variable
7800      in some loop.  */
7801   if (!find_induction_variables (data))
7802     goto finish;
7803
7804   /* Finds interesting uses (item 1).  */
7805   find_interesting_uses (data);
7806   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7807     goto finish;
7808
7809   /* Finds candidates for the induction variables (item 2).  */
7810   find_iv_candidates (data);
7811
7812   /* Calculates the costs (item 3, part 1).  */
7813   determine_iv_costs (data);
7814   determine_group_iv_costs (data);
7815   determine_set_costs (data);
7816
7817   /* Find the optimal set of induction variables (item 3, part 2).  */
7818   iv_ca = find_optimal_iv_set (data);
7819   if (!iv_ca)
7820     goto finish;
7821   changed = true;
7822
7823   /* Create the new induction variables (item 4, part 1).  */
7824   create_new_ivs (data, iv_ca);
7825   iv_ca_free (&iv_ca);
7826
7827   /* Rewrite the uses (item 4, part 2).  */
7828   rewrite_groups (data);
7829
7830   /* Remove the ivs that are unused after rewriting.  */
7831   remove_unused_ivs (data);
7832
7833   /* We have changed the structure of induction variables; it might happen
7834      that definitions in the scev database refer to some of them that were
7835      eliminated.  */
7836   scev_reset ();
7837
7838 finish:
7839   free_loop_data (data);
7840
7841   return changed;
7842 }
7843
7844 /* Main entry point.  Optimizes induction variables in loops.  */
7845
7846 void
7847 tree_ssa_iv_optimize (void)
7848 {
7849   struct loop *loop;
7850   struct ivopts_data data;
7851
7852   tree_ssa_iv_optimize_init (&data);
7853
7854   /* Optimize the loops starting with the innermost ones.  */
7855   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7856     {
7857       if (dump_file && (dump_flags & TDF_DETAILS))
7858         flow_loop_dump (loop, dump_file, NULL, 1);
7859
7860       tree_ssa_iv_optimize_loop (&data, loop);
7861     }
7862
7863   tree_ssa_iv_optimize_finalize (&data);
7864 }