gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "memmodel.h"
  79 #include "tm_p.h"
  80 #include "ssa.h"
  81 #include "expmed.h"
  82 #include "insn-config.h"
  83 #include "emit-rtl.h"
  84 #include "recog.h"
  85 #include "cgraph.h"
  86 #include "gimple-pretty-print.h"
  87 #include "alias.h"
  88 #include "fold-const.h"
  89 #include "stor-layout.h"
  90 #include "tree-eh.h"
  91 #include "gimplify.h"
  92 #include "gimple-iterator.h"
  93 #include "gimplify-me.h"
  94 #include "tree-cfg.h"
  95 #include "tree-ssa-loop-ivopts.h"
  96 #include "tree-ssa-loop-manip.h"
  97 #include "tree-ssa-loop-niter.h"
  98 #include "tree-ssa-loop.h"
  99 #include "explow.h"
 100 #include "expr.h"
 101 #include "tree-dfa.h"
 102 #include "tree-ssa.h"
 103 #include "cfgloop.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "tree-affine.h"
 107 #include "tree-ssa-propagate.h"
 108 #include "tree-ssa-address.h"
 109 #include "builtins.h"
 110 #include "tree-vectorizer.h"
 111
 112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 113    cost of different addressing modes.  This should be moved to a TBD
 114    interface between the GIMPLE and RTL worlds.  */
 115
 116 /* The infinite cost.  */
 117 #define INFTY 10000000
 118
 119 /* Returns the expected number of loop iterations for LOOP.
 120    The average trip count is computed from profile data if it
 121    exists. */
 122
 123 static inline HOST_WIDE_INT
 124 avg_loop_niter (struct loop *loop)
 125 {
 126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 127   if (niter == -1)
 128     {
 129       niter = likely_max_stmt_executions_int (loop);
 130
 131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
 132         return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_ADDRESS,          /* Use in an address.  */
 170   USE_COMPARE           /* Use is a compare.  */
 171 };
 172
 173 /* Cost of a computation.  */
 174 struct comp_cost
 175 {
 176   comp_cost (): cost (0), complexity (0), scratch (0)
 177   {}
 178
 179   comp_cost (int cost, unsigned complexity, int scratch = 0)
 180     : cost (cost), complexity (complexity), scratch (scratch)
 181   {}
 182
 183   /* Returns true if COST is infinite.  */
 184   bool infinite_cost_p ();
 185
 186   /* Adds costs COST1 and COST2.  */
 187   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 188
 189   /* Adds COST to the comp_cost.  */
 190   comp_cost operator+= (comp_cost cost);
 191
 192   /* Adds constant C to this comp_cost.  */
 193   comp_cost operator+= (HOST_WIDE_INT c);
 194
 195   /* Subtracts constant C to this comp_cost.  */
 196   comp_cost operator-= (HOST_WIDE_INT c);
 197
 198   /* Divide the comp_cost by constant C.  */
 199   comp_cost operator/= (HOST_WIDE_INT c);
 200
 201   /* Multiply the comp_cost by constant C.  */
 202   comp_cost operator*= (HOST_WIDE_INT c);
 203
 204   /* Subtracts costs COST1 and COST2.  */
 205   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 206
 207   /* Subtracts COST from this comp_cost.  */
 208   comp_cost operator-= (comp_cost cost);
 209
 210   /* Returns true if COST1 is smaller than COST2.  */
 211   friend bool operator< (comp_cost cost1, comp_cost cost2);
 212
 213   /* Returns true if COST1 and COST2 are equal.  */
 214   friend bool operator== (comp_cost cost1, comp_cost cost2);
 215
 216   /* Returns true if COST1 is smaller or equal than COST2.  */
 217   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 218
 219   int cost;             /* The runtime cost.  */
 220   unsigned complexity;  /* The estimate of the complexity of the code for
 221                            the computation (in no concrete units --
 222                            complexity field should be larger for more
 223                            complex expressions and addressing modes).  */
 224   int scratch;          /* Scratch used during cost computation.  */
 225 };
 226
 227 static const comp_cost no_cost;
 228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
 229
 230 bool
 231 comp_cost::infinite_cost_p ()
 232 {
 233   return cost == INFTY;
 234 }
 235
 236 comp_cost
 237 operator+ (comp_cost cost1, comp_cost cost2)
 238 {
 239   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 240     return infinite_cost;
 241
 242   cost1.cost += cost2.cost;
 243   cost1.complexity += cost2.complexity;
 244
 245   return cost1;
 246 }
 247
 248 comp_cost
 249 operator- (comp_cost cost1, comp_cost cost2)
 250 {
 251   if (cost1.infinite_cost_p ())
 252     return infinite_cost;
 253
 254   gcc_assert (!cost2.infinite_cost_p ());
 255
 256   cost1.cost -= cost2.cost;
 257   cost1.complexity -= cost2.complexity;
 258
 259   return cost1;
 260 }
 261
 262 comp_cost
 263 comp_cost::operator+= (comp_cost cost)
 264 {
 265   *this = *this + cost;
 266   return *this;
 267 }
 268
 269 comp_cost
 270 comp_cost::operator+= (HOST_WIDE_INT c)
 271 {
 272   if (infinite_cost_p ())
 273     return *this;
 274
 275   this->cost += c;
 276
 277   return *this;
 278 }
 279
 280 comp_cost
 281 comp_cost::operator-= (HOST_WIDE_INT c)
 282 {
 283   if (infinite_cost_p ())
 284     return *this;
 285
 286   this->cost -= c;
 287
 288   return *this;
 289 }
 290
 291 comp_cost
 292 comp_cost::operator/= (HOST_WIDE_INT c)
 293 {
 294   if (infinite_cost_p ())
 295     return *this;
 296
 297   this->cost /= c;
 298
 299   return *this;
 300 }
 301
 302 comp_cost
 303 comp_cost::operator*= (HOST_WIDE_INT c)
 304 {
 305   if (infinite_cost_p ())
 306     return *this;
 307
 308   this->cost *= c;
 309
 310   return *this;
 311 }
 312
 313 comp_cost
 314 comp_cost::operator-= (comp_cost cost)
 315 {
 316   *this = *this - cost;
 317   return *this;
 318 }
 319
 320 bool
 321 operator< (comp_cost cost1, comp_cost cost2)
 322 {
 323   if (cost1.cost == cost2.cost)
 324     return cost1.complexity < cost2.complexity;
 325
 326   return cost1.cost < cost2.cost;
 327 }
 328
 329 bool
 330 operator== (comp_cost cost1, comp_cost cost2)
 331 {
 332   return cost1.cost == cost2.cost
 333     && cost1.complexity == cost2.complexity;
 334 }
 335
 336 bool
 337 operator<= (comp_cost cost1, comp_cost cost2)
 338 {
 339   return cost1 < cost2 || cost1 == cost2;
 340 }
 341
 342 struct iv_inv_expr_ent;
 343
 344 /* The candidate - cost pair.  */
 345 struct cost_pair
 346 {
 347   struct iv_cand *cand; /* The candidate.  */
 348   comp_cost cost;       /* The cost.  */
 349   enum tree_code comp;  /* For iv elimination, the comparison.  */
 350   bitmap inv_vars;      /* The list of invariants that have to be
 351                            preserved.  */
 352   bitmap inv_exprs;     /* Loop invariant expressions.  */
 353   tree value;           /* For final value elimination, the expression for
 354                            the final value of the iv.  For iv elimination,
 355                            the new bound to compare with.  */
 356   iv_inv_expr_ent *inv_expr; /* Loop invariant expression.  */
 357 };
 358
 359 /* Use.  */
 360 struct iv_use
 361 {
 362   unsigned id;          /* The id of the use.  */
 363   unsigned group_id;    /* The group id the use belongs to.  */
 364   enum use_type type;   /* Type of the use.  */
 365   struct iv *iv;        /* The induction variable it is based on.  */
 366   gimple *stmt;         /* Statement in that it occurs.  */
 367   tree *op_p;           /* The place where it occurs.  */
 368
 369   tree addr_base;       /* Base address with const offset stripped.  */
 370   unsigned HOST_WIDE_INT addr_offset;
 371                         /* Const offset stripped from base address.  */
 372 };
 373
 374 /* Group of uses.  */
 375 struct iv_group
 376 {
 377   /* The id of the group.  */
 378   unsigned id;
 379   /* Uses of the group are of the same type.  */
 380   enum use_type type;
 381   /* The set of "related" IV candidates, plus the important ones.  */
 382   bitmap related_cands;
 383   /* Number of IV candidates in the cost_map.  */
 384   unsigned n_map_members;
 385   /* The costs wrto the iv candidates.  */
 386   struct cost_pair *cost_map;
 387   /* The selected candidate for the group.  */
 388   struct iv_cand *selected;
 389   /* Uses in the group.  */
 390   vec<struct iv_use *> vuses;
 391 };
 392
 393 /* The position where the iv is computed.  */
 394 enum iv_position
 395 {
 396   IP_NORMAL,            /* At the end, just before the exit condition.  */
 397   IP_END,               /* At the end of the latch block.  */
 398   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 399   IP_AFTER_USE,         /* Immediately after a specific use.  */
 400   IP_ORIGINAL           /* The original biv.  */
 401 };
 402
 403 /* The induction variable candidate.  */
 404 struct iv_cand
 405 {
 406   unsigned id;          /* The number of the candidate.  */
 407   bool important;       /* Whether this is an "important" candidate, i.e. such
 408                            that it should be considered by all uses.  */
 409   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 410   gimple *incremented_at;/* For original biv, the statement where it is
 411                            incremented.  */
 412   tree var_before;      /* The variable used for it before increment.  */
 413   tree var_after;       /* The variable used for it after increment.  */
 414   struct iv *iv;        /* The value of the candidate.  NULL for
 415                            "pseudocandidate" used to indicate the possibility
 416                            to replace the final value of an iv by direct
 417                            computation of the value.  */
 418   unsigned cost;        /* Cost of the candidate.  */
 419   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 420   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 421                               where it is incremented.  */
 422   bitmap inv_vars;      /* The list of invariants that are used in step of the
 423                            biv.  */
 424   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 425                            smaller type.  */
 426 };
 427
 428 /* Hashtable entry for common candidate derived from iv uses.  */
 429 struct iv_common_cand
 430 {
 431   tree base;
 432   tree step;
 433   /* IV uses from which this common candidate is derived.  */
 434   auto_vec<struct iv_use *> uses;
 435   hashval_t hash;
 436 };
 437
 438 /* Hashtable helpers.  */
 439
 440 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 441 {
 442   static inline hashval_t hash (const iv_common_cand *);
 443   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 444 };
 445
 446 /* Hash function for possible common candidates.  */
 447
 448 inline hashval_t
 449 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 450 {
 451   return ccand->hash;
 452 }
 453
 454 /* Hash table equality function for common candidates.  */
 455
 456 inline bool
 457 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 458                               const iv_common_cand *ccand2)
 459 {
 460   return (ccand1->hash == ccand2->hash
 461           && operand_equal_p (ccand1->base, ccand2->base, 0)
 462           && operand_equal_p (ccand1->step, ccand2->step, 0)
 463           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 464               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 465 }
 466
 467 /* Loop invariant expression hashtable entry.  */
 468
 469 struct iv_inv_expr_ent
 470 {
 471   /* Tree expression of the entry.  */
 472   tree expr;
 473   /* Unique indentifier.  */
 474   int id;
 475   /* Hash value.  */
 476   hashval_t hash;
 477 };
 478
 479 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 480
 481 static int
 482 sort_iv_inv_expr_ent (const void *a, const void *b)
 483 {
 484   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 485   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 486
 487   unsigned id1 = (*e1)->id;
 488   unsigned id2 = (*e2)->id;
 489
 490   if (id1 < id2)
 491     return -1;
 492   else if (id1 > id2)
 493     return 1;
 494   else
 495     return 0;
 496 }
 497
 498 /* Hashtable helpers.  */
 499
 500 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 501 {
 502   static inline hashval_t hash (const iv_inv_expr_ent *);
 503   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 504 };
 505
 506 /* Hash function for loop invariant expressions.  */
 507
 508 inline hashval_t
 509 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 510 {
 511   return expr->hash;
 512 }
 513
 514 /* Hash table equality function for expressions.  */
 515
 516 inline bool
 517 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 518                            const iv_inv_expr_ent *expr2)
 519 {
 520   return expr1->hash == expr2->hash
 521          && operand_equal_p (expr1->expr, expr2->expr, 0);
 522 }
 523
 524 struct ivopts_data
 525 {
 526   /* The currently optimized loop.  */
 527   struct loop *current_loop;
 528   source_location loop_loc;
 529
 530   /* Numbers of iterations for all exits of the current loop.  */
 531   hash_map<edge, tree_niter_desc *> *niters;
 532
 533   /* Number of registers used in it.  */
 534   unsigned regs_used;
 535
 536   /* The size of version_info array allocated.  */
 537   unsigned version_info_size;
 538
 539   /* The array of information for the ssa names.  */
 540   struct version_info *version_info;
 541
 542   /* The hashtable of loop invariant expressions created
 543      by ivopt.  */
 544   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 545
 546   /* The bitmap of indices in version_info whose value was changed.  */
 547   bitmap relevant;
 548
 549   /* The uses of induction variables.  */
 550   vec<iv_group *> vgroups;
 551
 552   /* The candidates.  */
 553   vec<iv_cand *> vcands;
 554
 555   /* A bitmap of important candidates.  */
 556   bitmap important_candidates;
 557
 558   /* Cache used by tree_to_aff_combination_expand.  */
 559   hash_map<tree, name_expansion *> *name_expansion_cache;
 560
 561   /* The hashtable of common candidates derived from iv uses.  */
 562   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 563
 564   /* The common candidates.  */
 565   vec<iv_common_cand *> iv_common_cands;
 566
 567   /* The maximum invariant variable id.  */
 568   unsigned max_inv_var_id;
 569
 570   /* The maximum invariant expression id.  */
 571   unsigned max_inv_expr_id;
 572
 573   /* Number of no_overflow BIVs which are not used in memory address.  */
 574   unsigned bivs_not_used_in_addr;
 575
 576   /* Obstack for iv structure.  */
 577   struct obstack iv_obstack;
 578
 579   /* Whether to consider just related and important candidates when replacing a
 580      use.  */
 581   bool consider_all_candidates;
 582
 583   /* Are we optimizing for speed?  */
 584   bool speed;
 585
 586   /* Whether the loop body includes any function calls.  */
 587   bool body_includes_call;
 588
 589   /* Whether the loop body can only be exited via single exit.  */
 590   bool loop_single_exit_p;
 591 };
 592
 593 /* An assignment of iv candidates to uses.  */
 594
 595 struct iv_ca
 596 {
 597   /* The number of uses covered by the assignment.  */
 598   unsigned upto;
 599
 600   /* Number of uses that cannot be expressed by the candidates in the set.  */
 601   unsigned bad_groups;
 602
 603   /* Candidate assigned to a use, together with the related costs.  */
 604   struct cost_pair **cand_for_group;
 605
 606   /* Number of times each candidate is used.  */
 607   unsigned *n_cand_uses;
 608
 609   /* The candidates used.  */
 610   bitmap cands;
 611
 612   /* The number of candidates in the set.  */
 613   unsigned n_cands;
 614
 615   /* The number of invariants needed, including both invariant variants and
 616      invariant expressions.  */
 617   unsigned n_invs;
 618
 619   /* Total cost of expressing uses.  */
 620   comp_cost cand_use_cost;
 621
 622   /* Total cost of candidates.  */
 623   unsigned cand_cost;
 624
 625   /* Number of times each invariant variable is used.  */
 626   unsigned *n_inv_var_uses;
 627
 628   /* Number of times each invariant expression is used.  */
 629   unsigned *n_inv_expr_uses;
 630
 631   /* Total cost of the assignment.  */
 632   comp_cost cost;
 633 };
 634
 635 /* Difference of two iv candidate assignments.  */
 636
 637 struct iv_ca_delta
 638 {
 639   /* Changed group.  */
 640   struct iv_group *group;
 641
 642   /* An old assignment (for rollback purposes).  */
 643   struct cost_pair *old_cp;
 644
 645   /* A new assignment.  */
 646   struct cost_pair *new_cp;
 647
 648   /* Next change in the list.  */
 649   struct iv_ca_delta *next;
 650 };
 651
 652 /* Bound on number of candidates below that all candidates are considered.  */
 653
 654 #define CONSIDER_ALL_CANDIDATES_BOUND \
 655   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 656
 657 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 658    optimizing such a loop would help, and it would take ages).  */
 659
 660 #define MAX_CONSIDERED_GROUPS \
 661   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 662
 663 /* If there are at most this number of ivs in the set, try removing unnecessary
 664    ivs from the set always.  */
 665
 666 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 667   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 668
 669 /* The list of trees for that the decl_rtl field must be reset is stored
 670    here.  */
 671
 672 static vec<tree> decl_rtl_to_reset;
 673
 674 static comp_cost force_expr_to_var_cost (tree, bool);
 675
 676 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 677
 678 edge
 679 single_dom_exit (struct loop *loop)
 680 {
 681   edge exit = single_exit (loop);
 682
 683   if (!exit)
 684     return NULL;
 685
 686   if (!just_once_each_iteration_p (loop, exit->src))
 687     return NULL;
 688
 689   return exit;
 690 }
 691
 692 /* Dumps information about the induction variable IV to FILE.  Don't dump
 693    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 694    preceding spaces indicated by INDENT_LEVEL.  */
 695
 696 void
 697 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 698 {
 699   const char *p;
 700   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 701
 702   if (indent_level > 4)
 703     indent_level = 4;
 704   p = spaces + 8 - (indent_level << 1);
 705
 706   fprintf (file, "%sIV struct:\n", p);
 707   if (iv->ssa_name && dump_name)
 708     {
 709       fprintf (file, "%s  SSA_NAME:\t", p);
 710       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 711       fprintf (file, "\n");
 712     }
 713
 714   fprintf (file, "%s  Type:\t", p);
 715   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 716   fprintf (file, "\n");
 717
 718   fprintf (file, "%s  Base:\t", p);
 719   print_generic_expr (file, iv->base, TDF_SLIM);
 720   fprintf (file, "\n");
 721
 722   fprintf (file, "%s  Step:\t", p);
 723   print_generic_expr (file, iv->step, TDF_SLIM);
 724   fprintf (file, "\n");
 725
 726   if (iv->base_object)
 727     {
 728       fprintf (file, "%s  Object:\t", p);
 729       print_generic_expr (file, iv->base_object, TDF_SLIM);
 730       fprintf (file, "\n");
 731     }
 732
 733   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 734
 735   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 736            p, iv->no_overflow ? "No-overflow" : "Overflow");
 737 }
 738
 739 /* Dumps information about the USE to FILE.  */
 740
 741 void
 742 dump_use (FILE *file, struct iv_use *use)
 743 {
 744   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 745   fprintf (file, "    At stmt:\t");
 746   print_gimple_stmt (file, use->stmt, 0, 0);
 747   fprintf (file, "    At pos:\t");
 748   if (use->op_p)
 749     print_generic_expr (file, *use->op_p, TDF_SLIM);
 750   fprintf (file, "\n");
 751   dump_iv (file, use->iv, false, 2);
 752 }
 753
 754 /* Dumps information about the uses to FILE.  */
 755
 756 void
 757 dump_groups (FILE *file, struct ivopts_data *data)
 758 {
 759   unsigned i, j;
 760   struct iv_group *group;
 761
 762   for (i = 0; i < data->vgroups.length (); i++)
 763     {
 764       group = data->vgroups[i];
 765       fprintf (file, "Group %d:\n", group->id);
 766       if (group->type == USE_NONLINEAR_EXPR)
 767         fprintf (file, "  Type:\tGENERIC\n");
 768       else if (group->type == USE_ADDRESS)
 769         fprintf (file, "  Type:\tADDRESS\n");
 770       else
 771         {
 772           gcc_assert (group->type == USE_COMPARE);
 773           fprintf (file, "  Type:\tCOMPARE\n");
 774         }
 775       for (j = 0; j < group->vuses.length (); j++)
 776         dump_use (file, group->vuses[j]);
 777     }
 778 }
 779
 780 /* Dumps information about induction variable candidate CAND to FILE.  */
 781
 782 void
 783 dump_cand (FILE *file, struct iv_cand *cand)
 784 {
 785   struct iv *iv = cand->iv;
 786
 787   fprintf (file, "Candidate %d:\n", cand->id);
 788   if (cand->inv_vars)
 789     {
 790       fprintf (file, "  Depend on inv.vars: ");
 791       dump_bitmap (file, cand->inv_vars);
 792     }
 793
 794   if (cand->var_before)
 795     {
 796       fprintf (file, "  Var befor: ");
 797       print_generic_expr (file, cand->var_before, TDF_SLIM);
 798       fprintf (file, "\n");
 799     }
 800   if (cand->var_after)
 801     {
 802       fprintf (file, "  Var after: ");
 803       print_generic_expr (file, cand->var_after, TDF_SLIM);
 804       fprintf (file, "\n");
 805     }
 806
 807   switch (cand->pos)
 808     {
 809     case IP_NORMAL:
 810       fprintf (file, "  Incr POS: before exit test\n");
 811       break;
 812
 813     case IP_BEFORE_USE:
 814       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 815       break;
 816
 817     case IP_AFTER_USE:
 818       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 819       break;
 820
 821     case IP_END:
 822       fprintf (file, "  Incr POS: at end\n");
 823       break;
 824
 825     case IP_ORIGINAL:
 826       fprintf (file, "  Incr POS: orig biv\n");
 827       break;
 828     }
 829
 830   dump_iv (file, iv, false, 1);
 831 }
 832
 833 /* Returns the info for ssa version VER.  */
 834
 835 static inline struct version_info *
 836 ver_info (struct ivopts_data *data, unsigned ver)
 837 {
 838   return data->version_info + ver;
 839 }
 840
 841 /* Returns the info for ssa name NAME.  */
 842
 843 static inline struct version_info *
 844 name_info (struct ivopts_data *data, tree name)
 845 {
 846   return ver_info (data, SSA_NAME_VERSION (name));
 847 }
 848
 849 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 850    emitted in LOOP.  */
 851
 852 static bool
 853 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 854 {
 855   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 856
 857   gcc_assert (bb);
 858
 859   if (sbb == loop->latch)
 860     return true;
 861
 862   if (sbb != bb)
 863     return false;
 864
 865   return stmt == last_stmt (bb);
 866 }
 867
 868 /* Returns true if STMT if after the place where the original induction
 869    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 870    if the positions are identical.  */
 871
 872 static bool
 873 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 874 {
 875   basic_block cand_bb = gimple_bb (cand->incremented_at);
 876   basic_block stmt_bb = gimple_bb (stmt);
 877
 878   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 879     return false;
 880
 881   if (stmt_bb != cand_bb)
 882     return true;
 883
 884   if (true_if_equal
 885       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 886     return true;
 887   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 888 }
 889
 890 /* Returns true if STMT if after the place where the induction variable
 891    CAND is incremented in LOOP.  */
 892
 893 static bool
 894 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 895 {
 896   switch (cand->pos)
 897     {
 898     case IP_END:
 899       return false;
 900
 901     case IP_NORMAL:
 902       return stmt_after_ip_normal_pos (loop, stmt);
 903
 904     case IP_ORIGINAL:
 905     case IP_AFTER_USE:
 906       return stmt_after_inc_pos (cand, stmt, false);
 907
 908     case IP_BEFORE_USE:
 909       return stmt_after_inc_pos (cand, stmt, true);
 910
 911     default:
 912       gcc_unreachable ();
 913     }
 914 }
 915
 916 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 917
 918 static bool
 919 abnormal_ssa_name_p (tree exp)
 920 {
 921   if (!exp)
 922     return false;
 923
 924   if (TREE_CODE (exp) != SSA_NAME)
 925     return false;
 926
 927   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 928 }
 929
 930 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 931    abnormal phi node.  Callback for for_each_index.  */
 932
 933 static bool
 934 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 935                                   void *data ATTRIBUTE_UNUSED)
 936 {
 937   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 938     {
 939       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 940         return false;
 941       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 942         return false;
 943     }
 944
 945   return !abnormal_ssa_name_p (*index);
 946 }
 947
 948 /* Returns true if EXPR contains a ssa name that occurs in an
 949    abnormal phi node.  */
 950
 951 bool
 952 contains_abnormal_ssa_name_p (tree expr)
 953 {
 954   enum tree_code code;
 955   enum tree_code_class codeclass;
 956
 957   if (!expr)
 958     return false;
 959
 960   code = TREE_CODE (expr);
 961   codeclass = TREE_CODE_CLASS (code);
 962
 963   if (code == SSA_NAME)
 964     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 965
 966   if (code == INTEGER_CST
 967       || is_gimple_min_invariant (expr))
 968     return false;
 969
 970   if (code == ADDR_EXPR)
 971     return !for_each_index (&TREE_OPERAND (expr, 0),
 972                             idx_contains_abnormal_ssa_name_p,
 973                             NULL);
 974
 975   if (code == COND_EXPR)
 976     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 977       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 978       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 979
 980   switch (codeclass)
 981     {
 982     case tcc_binary:
 983     case tcc_comparison:
 984       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 985         return true;
 986
 987       /* Fallthru.  */
 988     case tcc_unary:
 989       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 990         return true;
 991
 992       break;
 993
 994     default:
 995       gcc_unreachable ();
 996     }
 997
 998   return false;
 999 }
1000
1001 /*  Returns the structure describing number of iterations determined from
1002     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1003
1004 static struct tree_niter_desc *
1005 niter_for_exit (struct ivopts_data *data, edge exit)
1006 {
1007   struct tree_niter_desc *desc;
1008   tree_niter_desc **slot;
1009
1010   if (!data->niters)
1011     {
1012       data->niters = new hash_map<edge, tree_niter_desc *>;
1013       slot = NULL;
1014     }
1015   else
1016     slot = data->niters->get (exit);
1017
1018   if (!slot)
1019     {
1020       /* Try to determine number of iterations.  We cannot safely work with ssa
1021          names that appear in phi nodes on abnormal edges, so that we do not
1022          create overlapping life ranges for them (PR 27283).  */
1023       desc = XNEW (struct tree_niter_desc);
1024       if (!number_of_iterations_exit (data->current_loop,
1025                                       exit, desc, true)
1026           || contains_abnormal_ssa_name_p (desc->niter))
1027         {
1028           XDELETE (desc);
1029           desc = NULL;
1030         }
1031       data->niters->put (exit, desc);
1032     }
1033   else
1034     desc = *slot;
1035
1036   return desc;
1037 }
1038
1039 /* Returns the structure describing number of iterations determined from
1040    single dominating exit of DATA->current_loop, or NULL if something
1041    goes wrong.  */
1042
1043 static struct tree_niter_desc *
1044 niter_for_single_dom_exit (struct ivopts_data *data)
1045 {
1046   edge exit = single_dom_exit (data->current_loop);
1047
1048   if (!exit)
1049     return NULL;
1050
1051   return niter_for_exit (data, exit);
1052 }
1053
1054 /* Initializes data structures used by the iv optimization pass, stored
1055    in DATA.  */
1056
1057 static void
1058 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1059 {
1060   data->version_info_size = 2 * num_ssa_names;
1061   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1062   data->relevant = BITMAP_ALLOC (NULL);
1063   data->important_candidates = BITMAP_ALLOC (NULL);
1064   data->max_inv_var_id = 0;
1065   data->max_inv_expr_id = 0;
1066   data->niters = NULL;
1067   data->vgroups.create (20);
1068   data->vcands.create (20);
1069   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1070   data->name_expansion_cache = NULL;
1071   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1072   data->iv_common_cands.create (20);
1073   decl_rtl_to_reset.create (20);
1074   gcc_obstack_init (&data->iv_obstack);
1075 }
1076
1077 /* Returns a memory object to that EXPR points.  In case we are able to
1078    determine that it does not point to any such object, NULL is returned.  */
1079
1080 static tree
1081 determine_base_object (tree expr)
1082 {
1083   enum tree_code code = TREE_CODE (expr);
1084   tree base, obj;
1085
1086   /* If this is a pointer casted to any type, we need to determine
1087      the base object for the pointer; so handle conversions before
1088      throwing away non-pointer expressions.  */
1089   if (CONVERT_EXPR_P (expr))
1090     return determine_base_object (TREE_OPERAND (expr, 0));
1091
1092   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1093     return NULL_TREE;
1094
1095   switch (code)
1096     {
1097     case INTEGER_CST:
1098       return NULL_TREE;
1099
1100     case ADDR_EXPR:
1101       obj = TREE_OPERAND (expr, 0);
1102       base = get_base_address (obj);
1103
1104       if (!base)
1105         return expr;
1106
1107       if (TREE_CODE (base) == MEM_REF)
1108         return determine_base_object (TREE_OPERAND (base, 0));
1109
1110       return fold_convert (ptr_type_node,
1111                            build_fold_addr_expr (base));
1112
1113     case POINTER_PLUS_EXPR:
1114       return determine_base_object (TREE_OPERAND (expr, 0));
1115
1116     case PLUS_EXPR:
1117     case MINUS_EXPR:
1118       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1119       gcc_unreachable ();
1120
1121     default:
1122       return fold_convert (ptr_type_node, expr);
1123     }
1124 }
1125
1126 /* Return true if address expression with non-DECL_P operand appears
1127    in EXPR.  */
1128
1129 static bool
1130 contain_complex_addr_expr (tree expr)
1131 {
1132   bool res = false;
1133
1134   STRIP_NOPS (expr);
1135   switch (TREE_CODE (expr))
1136     {
1137     case POINTER_PLUS_EXPR:
1138     case PLUS_EXPR:
1139     case MINUS_EXPR:
1140       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1141       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1142       break;
1143
1144     case ADDR_EXPR:
1145       return (!DECL_P (TREE_OPERAND (expr, 0)));
1146
1147     default:
1148       return false;
1149     }
1150
1151   return res;
1152 }
1153
1154 /* Allocates an induction variable with given initial value BASE and step STEP
1155    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1156
1157 static struct iv *
1158 alloc_iv (struct ivopts_data *data, tree base, tree step,
1159           bool no_overflow = false)
1160 {
1161   tree expr = base;
1162   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1163                                               sizeof (struct iv));
1164   gcc_assert (step != NULL_TREE);
1165
1166   /* Lower address expression in base except ones with DECL_P as operand.
1167      By doing this:
1168        1) More accurate cost can be computed for address expressions;
1169        2) Duplicate candidates won't be created for bases in different
1170           forms, like &a[0] and &a.  */
1171   STRIP_NOPS (expr);
1172   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1173       || contain_complex_addr_expr (expr))
1174     {
1175       aff_tree comb;
1176       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1177       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1178     }
1179
1180   iv->base = base;
1181   iv->base_object = determine_base_object (base);
1182   iv->step = step;
1183   iv->biv_p = false;
1184   iv->nonlin_use = NULL;
1185   iv->ssa_name = NULL_TREE;
1186   if (!no_overflow
1187        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1188                               base, step))
1189     no_overflow = true;
1190   iv->no_overflow = no_overflow;
1191   iv->have_address_use = false;
1192
1193   return iv;
1194 }
1195
1196 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1197    doesn't overflow.  */
1198
1199 static void
1200 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1201         bool no_overflow)
1202 {
1203   struct version_info *info = name_info (data, iv);
1204
1205   gcc_assert (!info->iv);
1206
1207   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1208   info->iv = alloc_iv (data, base, step, no_overflow);
1209   info->iv->ssa_name = iv;
1210 }
1211
1212 /* Finds induction variable declaration for VAR.  */
1213
1214 static struct iv *
1215 get_iv (struct ivopts_data *data, tree var)
1216 {
1217   basic_block bb;
1218   tree type = TREE_TYPE (var);
1219
1220   if (!POINTER_TYPE_P (type)
1221       && !INTEGRAL_TYPE_P (type))
1222     return NULL;
1223
1224   if (!name_info (data, var)->iv)
1225     {
1226       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1227
1228       if (!bb
1229           || !flow_bb_inside_loop_p (data->current_loop, bb))
1230         set_iv (data, var, var, build_int_cst (type, 0), true);
1231     }
1232
1233   return name_info (data, var)->iv;
1234 }
1235
1236 /* Return the first non-invariant ssa var found in EXPR.  */
1237
1238 static tree
1239 extract_single_var_from_expr (tree expr)
1240 {
1241   int i, n;
1242   tree tmp;
1243   enum tree_code code;
1244
1245   if (!expr || is_gimple_min_invariant (expr))
1246     return NULL;
1247
1248   code = TREE_CODE (expr);
1249   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1250     {
1251       n = TREE_OPERAND_LENGTH (expr);
1252       for (i = 0; i < n; i++)
1253         {
1254           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1255
1256           if (tmp)
1257             return tmp;
1258         }
1259     }
1260   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1261 }
1262
1263 /* Finds basic ivs.  */
1264
1265 static bool
1266 find_bivs (struct ivopts_data *data)
1267 {
1268   gphi *phi;
1269   affine_iv iv;
1270   tree step, type, base, stop;
1271   bool found = false;
1272   struct loop *loop = data->current_loop;
1273   gphi_iterator psi;
1274
1275   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1276     {
1277       phi = psi.phi ();
1278
1279       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1280         continue;
1281
1282       if (virtual_operand_p (PHI_RESULT (phi)))
1283         continue;
1284
1285       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1286         continue;
1287
1288       if (integer_zerop (iv.step))
1289         continue;
1290
1291       step = iv.step;
1292       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1293       /* Stop expanding iv base at the first ssa var referred by iv step.
1294          Ideally we should stop at any ssa var, because that's expensive
1295          and unusual to happen, we just do it on the first one.
1296
1297          See PR64705 for the rationale.  */
1298       stop = extract_single_var_from_expr (step);
1299       base = expand_simple_operations (base, stop);
1300       if (contains_abnormal_ssa_name_p (base)
1301           || contains_abnormal_ssa_name_p (step))
1302         continue;
1303
1304       type = TREE_TYPE (PHI_RESULT (phi));
1305       base = fold_convert (type, base);
1306       if (step)
1307         {
1308           if (POINTER_TYPE_P (type))
1309             step = convert_to_ptrofftype (step);
1310           else
1311             step = fold_convert (type, step);
1312         }
1313
1314       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1315       found = true;
1316     }
1317
1318   return found;
1319 }
1320
1321 /* Marks basic ivs.  */
1322
1323 static void
1324 mark_bivs (struct ivopts_data *data)
1325 {
1326   gphi *phi;
1327   gimple *def;
1328   tree var;
1329   struct iv *iv, *incr_iv;
1330   struct loop *loop = data->current_loop;
1331   basic_block incr_bb;
1332   gphi_iterator psi;
1333
1334   data->bivs_not_used_in_addr = 0;
1335   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1336     {
1337       phi = psi.phi ();
1338
1339       iv = get_iv (data, PHI_RESULT (phi));
1340       if (!iv)
1341         continue;
1342
1343       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1344       def = SSA_NAME_DEF_STMT (var);
1345       /* Don't mark iv peeled from other one as biv.  */
1346       if (def
1347           && gimple_code (def) == GIMPLE_PHI
1348           && gimple_bb (def) == loop->header)
1349         continue;
1350
1351       incr_iv = get_iv (data, var);
1352       if (!incr_iv)
1353         continue;
1354
1355       /* If the increment is in the subloop, ignore it.  */
1356       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1357       if (incr_bb->loop_father != data->current_loop
1358           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1359         continue;
1360
1361       iv->biv_p = true;
1362       incr_iv->biv_p = true;
1363       if (iv->no_overflow)
1364         data->bivs_not_used_in_addr++;
1365       if (incr_iv->no_overflow)
1366         data->bivs_not_used_in_addr++;
1367     }
1368 }
1369
1370 /* Checks whether STMT defines a linear induction variable and stores its
1371    parameters to IV.  */
1372
1373 static bool
1374 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1375 {
1376   tree lhs, stop;
1377   struct loop *loop = data->current_loop;
1378
1379   iv->base = NULL_TREE;
1380   iv->step = NULL_TREE;
1381
1382   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1383     return false;
1384
1385   lhs = gimple_assign_lhs (stmt);
1386   if (TREE_CODE (lhs) != SSA_NAME)
1387     return false;
1388
1389   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1390     return false;
1391
1392   /* Stop expanding iv base at the first ssa var referred by iv step.
1393      Ideally we should stop at any ssa var, because that's expensive
1394      and unusual to happen, we just do it on the first one.
1395
1396      See PR64705 for the rationale.  */
1397   stop = extract_single_var_from_expr (iv->step);
1398   iv->base = expand_simple_operations (iv->base, stop);
1399   if (contains_abnormal_ssa_name_p (iv->base)
1400       || contains_abnormal_ssa_name_p (iv->step))
1401     return false;
1402
1403   /* If STMT could throw, then do not consider STMT as defining a GIV.
1404      While this will suppress optimizations, we can not safely delete this
1405      GIV and associated statements, even if it appears it is not used.  */
1406   if (stmt_could_throw_p (stmt))
1407     return false;
1408
1409   return true;
1410 }
1411
1412 /* Finds general ivs in statement STMT.  */
1413
1414 static void
1415 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1416 {
1417   affine_iv iv;
1418
1419   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1420     return;
1421
1422   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1423 }
1424
1425 /* Finds general ivs in basic block BB.  */
1426
1427 static void
1428 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1429 {
1430   gimple_stmt_iterator bsi;
1431
1432   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1433     find_givs_in_stmt (data, gsi_stmt (bsi));
1434 }
1435
1436 /* Finds general ivs.  */
1437
1438 static void
1439 find_givs (struct ivopts_data *data)
1440 {
1441   struct loop *loop = data->current_loop;
1442   basic_block *body = get_loop_body_in_dom_order (loop);
1443   unsigned i;
1444
1445   for (i = 0; i < loop->num_nodes; i++)
1446     find_givs_in_bb (data, body[i]);
1447   free (body);
1448 }
1449
1450 /* For each ssa name defined in LOOP determines whether it is an induction
1451    variable and if so, its initial value and step.  */
1452
1453 static bool
1454 find_induction_variables (struct ivopts_data *data)
1455 {
1456   unsigned i;
1457   bitmap_iterator bi;
1458
1459   if (!find_bivs (data))
1460     return false;
1461
1462   find_givs (data);
1463   mark_bivs (data);
1464
1465   if (dump_file && (dump_flags & TDF_DETAILS))
1466     {
1467       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1468
1469       if (niter)
1470         {
1471           fprintf (dump_file, "  number of iterations ");
1472           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1473           if (!integer_zerop (niter->may_be_zero))
1474             {
1475               fprintf (dump_file, "; zero if ");
1476               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1477             }
1478           fprintf (dump_file, "\n");
1479         };
1480
1481       fprintf (dump_file, "\n<Induction Vars>:\n");
1482       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1483         {
1484           struct version_info *info = ver_info (data, i);
1485           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1486             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1487         }
1488     }
1489
1490   return true;
1491 }
1492
1493 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1494    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1495    is the const offset stripped from IV base; for other types use, both
1496    are zero by default.  */
1497
1498 static struct iv_use *
1499 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1500             gimple *stmt, enum use_type type, tree addr_base,
1501             unsigned HOST_WIDE_INT addr_offset)
1502 {
1503   struct iv_use *use = XCNEW (struct iv_use);
1504
1505   use->id = group->vuses.length ();
1506   use->group_id = group->id;
1507   use->type = type;
1508   use->iv = iv;
1509   use->stmt = stmt;
1510   use->op_p = use_p;
1511   use->addr_base = addr_base;
1512   use->addr_offset = addr_offset;
1513
1514   group->vuses.safe_push (use);
1515   return use;
1516 }
1517
1518 /* Checks whether OP is a loop-level invariant and if so, records it.
1519    NONLINEAR_USE is true if the invariant is used in a way we do not
1520    handle specially.  */
1521
1522 static void
1523 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1524 {
1525   basic_block bb;
1526   struct version_info *info;
1527
1528   if (TREE_CODE (op) != SSA_NAME
1529       || virtual_operand_p (op))
1530     return;
1531
1532   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1533   if (bb
1534       && flow_bb_inside_loop_p (data->current_loop, bb))
1535     return;
1536
1537   info = name_info (data, op);
1538   info->name = op;
1539   info->has_nonlin_use |= nonlinear_use;
1540   if (!info->inv_id)
1541     info->inv_id = ++data->max_inv_var_id;
1542   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1543 }
1544
1545 static tree
1546 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1547
1548 /* Record a group of TYPE.  */
1549
1550 static struct iv_group *
1551 record_group (struct ivopts_data *data, enum use_type type)
1552 {
1553   struct iv_group *group = XCNEW (struct iv_group);
1554
1555   group->id = data->vgroups.length ();
1556   group->type = type;
1557   group->related_cands = BITMAP_ALLOC (NULL);
1558   group->vuses.create (1);
1559
1560   data->vgroups.safe_push (group);
1561   return group;
1562 }
1563
1564 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1565    New group will be created if there is no existing group for the use.  */
1566
1567 static struct iv_use *
1568 record_group_use (struct ivopts_data *data, tree *use_p,
1569                   struct iv *iv, gimple *stmt, enum use_type type)
1570 {
1571   tree addr_base = NULL;
1572   struct iv_group *group = NULL;
1573   unsigned HOST_WIDE_INT addr_offset = 0;
1574
1575   /* Record non address type use in a new group.  */
1576   if (type == USE_ADDRESS && iv->base_object)
1577     {
1578       unsigned int i;
1579
1580       addr_base = strip_offset (iv->base, &addr_offset);
1581       for (i = 0; i < data->vgroups.length (); i++)
1582         {
1583           struct iv_use *use;
1584
1585           group = data->vgroups[i];
1586           use = group->vuses[0];
1587           if (use->type != USE_ADDRESS || !use->iv->base_object)
1588             continue;
1589
1590           /* Check if it has the same stripped base and step.  */
1591           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1592               && operand_equal_p (iv->step, use->iv->step, 0)
1593               && operand_equal_p (addr_base, use->addr_base, 0))
1594             break;
1595         }
1596       if (i == data->vgroups.length ())
1597         group = NULL;
1598     }
1599
1600   if (!group)
1601     group = record_group (data, type);
1602
1603   return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1604 }
1605
1606 /* Checks whether the use OP is interesting and if so, records it.  */
1607
1608 static struct iv_use *
1609 find_interesting_uses_op (struct ivopts_data *data, tree op)
1610 {
1611   struct iv *iv;
1612   gimple *stmt;
1613   struct iv_use *use;
1614
1615   if (TREE_CODE (op) != SSA_NAME)
1616     return NULL;
1617
1618   iv = get_iv (data, op);
1619   if (!iv)
1620     return NULL;
1621
1622   if (iv->nonlin_use)
1623     {
1624       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1625       return iv->nonlin_use;
1626     }
1627
1628   if (integer_zerop (iv->step))
1629     {
1630       record_invariant (data, op, true);
1631       return NULL;
1632     }
1633
1634   stmt = SSA_NAME_DEF_STMT (op);
1635   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1636
1637   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1638   iv->nonlin_use = use;
1639   return use;
1640 }
1641
1642 /* Given a condition in statement STMT, checks whether it is a compare
1643    of an induction variable and an invariant.  If this is the case,
1644    CONTROL_VAR is set to location of the iv, BOUND to the location of
1645    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1646    induction variable descriptions, and true is returned.  If this is not
1647    the case, CONTROL_VAR and BOUND are set to the arguments of the
1648    condition and false is returned.  */
1649
1650 static bool
1651 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1652                        tree **control_var, tree **bound,
1653                        struct iv **iv_var, struct iv **iv_bound)
1654 {
1655   /* The objects returned when COND has constant operands.  */
1656   static struct iv const_iv;
1657   static tree zero;
1658   tree *op0 = &zero, *op1 = &zero;
1659   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1660   bool ret = false;
1661
1662   if (gimple_code (stmt) == GIMPLE_COND)
1663     {
1664       gcond *cond_stmt = as_a <gcond *> (stmt);
1665       op0 = gimple_cond_lhs_ptr (cond_stmt);
1666       op1 = gimple_cond_rhs_ptr (cond_stmt);
1667     }
1668   else
1669     {
1670       op0 = gimple_assign_rhs1_ptr (stmt);
1671       op1 = gimple_assign_rhs2_ptr (stmt);
1672     }
1673
1674   zero = integer_zero_node;
1675   const_iv.step = integer_zero_node;
1676
1677   if (TREE_CODE (*op0) == SSA_NAME)
1678     iv0 = get_iv (data, *op0);
1679   if (TREE_CODE (*op1) == SSA_NAME)
1680     iv1 = get_iv (data, *op1);
1681
1682   /* Exactly one of the compared values must be an iv, and the other one must
1683      be an invariant.  */
1684   if (!iv0 || !iv1)
1685     goto end;
1686
1687   if (integer_zerop (iv0->step))
1688     {
1689       /* Control variable may be on the other side.  */
1690       std::swap (op0, op1);
1691       std::swap (iv0, iv1);
1692     }
1693   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1694
1695 end:
1696   if (control_var)
1697     *control_var = op0;
1698   if (iv_var)
1699     *iv_var = iv0;
1700   if (bound)
1701     *bound = op1;
1702   if (iv_bound)
1703     *iv_bound = iv1;
1704
1705   return ret;
1706 }
1707
1708 /* Checks whether the condition in STMT is interesting and if so,
1709    records it.  */
1710
1711 static void
1712 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1713 {
1714   tree *var_p, *bound_p;
1715   struct iv *var_iv;
1716
1717   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1718     {
1719       find_interesting_uses_op (data, *var_p);
1720       find_interesting_uses_op (data, *bound_p);
1721       return;
1722     }
1723
1724   record_group_use (data, NULL, var_iv, stmt, USE_COMPARE);
1725 }
1726
1727 /* Returns the outermost loop EXPR is obviously invariant in
1728    relative to the loop LOOP, i.e. if all its operands are defined
1729    outside of the returned loop.  Returns NULL if EXPR is not
1730    even obviously invariant in LOOP.  */
1731
1732 struct loop *
1733 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1734 {
1735   basic_block def_bb;
1736   unsigned i, len;
1737
1738   if (is_gimple_min_invariant (expr))
1739     return current_loops->tree_root;
1740
1741   if (TREE_CODE (expr) == SSA_NAME)
1742     {
1743       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1744       if (def_bb)
1745         {
1746           if (flow_bb_inside_loop_p (loop, def_bb))
1747             return NULL;
1748           return superloop_at_depth (loop,
1749                                      loop_depth (def_bb->loop_father) + 1);
1750         }
1751
1752       return current_loops->tree_root;
1753     }
1754
1755   if (!EXPR_P (expr))
1756     return NULL;
1757
1758   unsigned maxdepth = 0;
1759   len = TREE_OPERAND_LENGTH (expr);
1760   for (i = 0; i < len; i++)
1761     {
1762       struct loop *ivloop;
1763       if (!TREE_OPERAND (expr, i))
1764         continue;
1765
1766       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1767       if (!ivloop)
1768         return NULL;
1769       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1770     }
1771
1772   return superloop_at_depth (loop, maxdepth);
1773 }
1774
1775 /* Returns true if expression EXPR is obviously invariant in LOOP,
1776    i.e. if all its operands are defined outside of the LOOP.  LOOP
1777    should not be the function body.  */
1778
1779 bool
1780 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1781 {
1782   basic_block def_bb;
1783   unsigned i, len;
1784
1785   gcc_assert (loop_depth (loop) > 0);
1786
1787   if (is_gimple_min_invariant (expr))
1788     return true;
1789
1790   if (TREE_CODE (expr) == SSA_NAME)
1791     {
1792       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1793       if (def_bb
1794           && flow_bb_inside_loop_p (loop, def_bb))
1795         return false;
1796
1797       return true;
1798     }
1799
1800   if (!EXPR_P (expr))
1801     return false;
1802
1803   len = TREE_OPERAND_LENGTH (expr);
1804   for (i = 0; i < len; i++)
1805     if (TREE_OPERAND (expr, i)
1806         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1807       return false;
1808
1809   return true;
1810 }
1811
1812 /* Given expression EXPR which computes inductive values with respect
1813    to loop recorded in DATA, this function returns biv from which EXPR
1814    is derived by tracing definition chains of ssa variables in EXPR.  */
1815
1816 static struct iv*
1817 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1818 {
1819   struct iv *iv;
1820   unsigned i, n;
1821   tree e2, e1;
1822   enum tree_code code;
1823   gimple *stmt;
1824
1825   if (expr == NULL_TREE)
1826     return NULL;
1827
1828   if (is_gimple_min_invariant (expr))
1829     return NULL;
1830
1831   code = TREE_CODE (expr);
1832   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1833     {
1834       n = TREE_OPERAND_LENGTH (expr);
1835       for (i = 0; i < n; i++)
1836         {
1837           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1838           if (iv)
1839             return iv;
1840         }
1841     }
1842
1843   /* Stop if it's not ssa name.  */
1844   if (code != SSA_NAME)
1845     return NULL;
1846
1847   iv = get_iv (data, expr);
1848   if (!iv || integer_zerop (iv->step))
1849     return NULL;
1850   else if (iv->biv_p)
1851     return iv;
1852
1853   stmt = SSA_NAME_DEF_STMT (expr);
1854   if (gphi *phi = dyn_cast <gphi *> (stmt))
1855     {
1856       ssa_op_iter iter;
1857       use_operand_p use_p;
1858       basic_block phi_bb = gimple_bb (phi);
1859
1860       /* Skip loop header PHI that doesn't define biv.  */
1861       if (phi_bb->loop_father == data->current_loop)
1862         return NULL;
1863
1864       if (virtual_operand_p (gimple_phi_result (phi)))
1865         return NULL;
1866
1867       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1868         {
1869           tree use = USE_FROM_PTR (use_p);
1870           iv = find_deriving_biv_for_expr (data, use);
1871           if (iv)
1872             return iv;
1873         }
1874       return NULL;
1875     }
1876   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1877     return NULL;
1878
1879   e1 = gimple_assign_rhs1 (stmt);
1880   code = gimple_assign_rhs_code (stmt);
1881   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1882     return find_deriving_biv_for_expr (data, e1);
1883
1884   switch (code)
1885     {
1886     case MULT_EXPR:
1887     case PLUS_EXPR:
1888     case MINUS_EXPR:
1889     case POINTER_PLUS_EXPR:
1890       /* Increments, decrements and multiplications by a constant
1891          are simple.  */
1892       e2 = gimple_assign_rhs2 (stmt);
1893       iv = find_deriving_biv_for_expr (data, e2);
1894       if (iv)
1895         return iv;
1896       gcc_fallthrough ();
1897
1898     CASE_CONVERT:
1899       /* Casts are simple.  */
1900       return find_deriving_biv_for_expr (data, e1);
1901
1902     default:
1903       break;
1904     }
1905
1906   return NULL;
1907 }
1908
1909 /* Record BIV, its predecessor and successor that they are used in
1910    address type uses.  */
1911
1912 static void
1913 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1914 {
1915   unsigned i;
1916   tree type, base_1, base_2;
1917   bitmap_iterator bi;
1918
1919   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1920       || biv->have_address_use || !biv->no_overflow)
1921     return;
1922
1923   type = TREE_TYPE (biv->base);
1924   if (!INTEGRAL_TYPE_P (type))
1925     return;
1926
1927   biv->have_address_use = true;
1928   data->bivs_not_used_in_addr--;
1929   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1930   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1931     {
1932       struct iv *iv = ver_info (data, i)->iv;
1933
1934       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1935           || iv->have_address_use || !iv->no_overflow)
1936         continue;
1937
1938       if (type != TREE_TYPE (iv->base)
1939           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1940         continue;
1941
1942       if (!operand_equal_p (biv->step, iv->step, 0))
1943         continue;
1944
1945       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1946       if (operand_equal_p (base_1, iv->base, 0)
1947           || operand_equal_p (base_2, biv->base, 0))
1948         {
1949           iv->have_address_use = true;
1950           data->bivs_not_used_in_addr--;
1951         }
1952     }
1953 }
1954
1955 /* Cumulates the steps of indices into DATA and replaces their values with the
1956    initial ones.  Returns false when the value of the index cannot be determined.
1957    Callback for for_each_index.  */
1958
1959 struct ifs_ivopts_data
1960 {
1961   struct ivopts_data *ivopts_data;
1962   gimple *stmt;
1963   tree step;
1964 };
1965
1966 static bool
1967 idx_find_step (tree base, tree *idx, void *data)
1968 {
1969   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1970   struct iv *iv;
1971   bool use_overflow_semantics = false;
1972   tree step, iv_base, iv_step, lbound, off;
1973   struct loop *loop = dta->ivopts_data->current_loop;
1974
1975   /* If base is a component ref, require that the offset of the reference
1976      be invariant.  */
1977   if (TREE_CODE (base) == COMPONENT_REF)
1978     {
1979       off = component_ref_field_offset (base);
1980       return expr_invariant_in_loop_p (loop, off);
1981     }
1982
1983   /* If base is array, first check whether we will be able to move the
1984      reference out of the loop (in order to take its address in strength
1985      reduction).  In order for this to work we need both lower bound
1986      and step to be loop invariants.  */
1987   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1988     {
1989       /* Moreover, for a range, the size needs to be invariant as well.  */
1990       if (TREE_CODE (base) == ARRAY_RANGE_REF
1991           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1992         return false;
1993
1994       step = array_ref_element_size (base);
1995       lbound = array_ref_low_bound (base);
1996
1997       if (!expr_invariant_in_loop_p (loop, step)
1998           || !expr_invariant_in_loop_p (loop, lbound))
1999         return false;
2000     }
2001
2002   if (TREE_CODE (*idx) != SSA_NAME)
2003     return true;
2004
2005   iv = get_iv (dta->ivopts_data, *idx);
2006   if (!iv)
2007     return false;
2008
2009   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2010           *&x[0], which is not folded and does not trigger the
2011           ARRAY_REF path below.  */
2012   *idx = iv->base;
2013
2014   if (integer_zerop (iv->step))
2015     return true;
2016
2017   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2018     {
2019       step = array_ref_element_size (base);
2020
2021       /* We only handle addresses whose step is an integer constant.  */
2022       if (TREE_CODE (step) != INTEGER_CST)
2023         return false;
2024     }
2025   else
2026     /* The step for pointer arithmetics already is 1 byte.  */
2027     step = size_one_node;
2028
2029   iv_base = iv->base;
2030   iv_step = iv->step;
2031   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2032     use_overflow_semantics = true;
2033
2034   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2035                             sizetype, &iv_base, &iv_step, dta->stmt,
2036                             use_overflow_semantics))
2037     {
2038       /* The index might wrap.  */
2039       return false;
2040     }
2041
2042   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2043   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2044
2045   if (dta->ivopts_data->bivs_not_used_in_addr)
2046     {
2047       if (!iv->biv_p)
2048         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2049
2050       record_biv_for_address_use (dta->ivopts_data, iv);
2051     }
2052   return true;
2053 }
2054
2055 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2056    object is passed to it in DATA.  */
2057
2058 static bool
2059 idx_record_use (tree base, tree *idx,
2060                 void *vdata)
2061 {
2062   struct ivopts_data *data = (struct ivopts_data *) vdata;
2063   find_interesting_uses_op (data, *idx);
2064   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2065     {
2066       find_interesting_uses_op (data, array_ref_element_size (base));
2067       find_interesting_uses_op (data, array_ref_low_bound (base));
2068     }
2069   return true;
2070 }
2071
2072 /* If we can prove that TOP = cst * BOT for some constant cst,
2073    store cst to MUL and return true.  Otherwise return false.
2074    The returned value is always sign-extended, regardless of the
2075    signedness of TOP and BOT.  */
2076
2077 static bool
2078 constant_multiple_of (tree top, tree bot, widest_int *mul)
2079 {
2080   tree mby;
2081   enum tree_code code;
2082   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2083   widest_int res, p0, p1;
2084
2085   STRIP_NOPS (top);
2086   STRIP_NOPS (bot);
2087
2088   if (operand_equal_p (top, bot, 0))
2089     {
2090       *mul = 1;
2091       return true;
2092     }
2093
2094   code = TREE_CODE (top);
2095   switch (code)
2096     {
2097     case MULT_EXPR:
2098       mby = TREE_OPERAND (top, 1);
2099       if (TREE_CODE (mby) != INTEGER_CST)
2100         return false;
2101
2102       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2103         return false;
2104
2105       *mul = wi::sext (res * wi::to_widest (mby), precision);
2106       return true;
2107
2108     case PLUS_EXPR:
2109     case MINUS_EXPR:
2110       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2111           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2112         return false;
2113
2114       if (code == MINUS_EXPR)
2115         p1 = -p1;
2116       *mul = wi::sext (p0 + p1, precision);
2117       return true;
2118
2119     case INTEGER_CST:
2120       if (TREE_CODE (bot) != INTEGER_CST)
2121         return false;
2122
2123       p0 = widest_int::from (top, SIGNED);
2124       p1 = widest_int::from (bot, SIGNED);
2125       if (p1 == 0)
2126         return false;
2127       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2128       return res == 0;
2129
2130     default:
2131       return false;
2132     }
2133 }
2134
2135 /* Return true if memory reference REF with step STEP may be unaligned.  */
2136
2137 static bool
2138 may_be_unaligned_p (tree ref, tree step)
2139 {
2140   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2141      thus they are not misaligned.  */
2142   if (TREE_CODE (ref) == TARGET_MEM_REF)
2143     return false;
2144
2145   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2146   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2147     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2148
2149   unsigned HOST_WIDE_INT bitpos;
2150   unsigned int ref_align;
2151   get_object_alignment_1 (ref, &ref_align, &bitpos);
2152   if (ref_align < align
2153       || (bitpos % align) != 0
2154       || (bitpos % BITS_PER_UNIT) != 0)
2155     return true;
2156
2157   unsigned int trailing_zeros = tree_ctz (step);
2158   if (trailing_zeros < HOST_BITS_PER_INT
2159       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2160     return true;
2161
2162   return false;
2163 }
2164
2165 /* Return true if EXPR may be non-addressable.   */
2166
2167 bool
2168 may_be_nonaddressable_p (tree expr)
2169 {
2170   switch (TREE_CODE (expr))
2171     {
2172     case TARGET_MEM_REF:
2173       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2174          target, thus they are always addressable.  */
2175       return false;
2176
2177     case MEM_REF:
2178       /* Likewise for MEM_REFs, modulo the storage order.  */
2179       return REF_REVERSE_STORAGE_ORDER (expr);
2180
2181     case BIT_FIELD_REF:
2182       if (REF_REVERSE_STORAGE_ORDER (expr))
2183         return true;
2184       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2185
2186     case COMPONENT_REF:
2187       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2188         return true;
2189       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2190              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2191
2192     case ARRAY_REF:
2193     case ARRAY_RANGE_REF:
2194       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2195         return true;
2196       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2197
2198     case VIEW_CONVERT_EXPR:
2199       /* This kind of view-conversions may wrap non-addressable objects
2200          and make them look addressable.  After some processing the
2201          non-addressability may be uncovered again, causing ADDR_EXPRs
2202          of inappropriate objects to be built.  */
2203       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2204           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2205         return true;
2206       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2207
2208     CASE_CONVERT:
2209       return true;
2210
2211     default:
2212       break;
2213     }
2214
2215   return false;
2216 }
2217
2218 /* Finds addresses in *OP_P inside STMT.  */
2219
2220 static void
2221 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2222                                tree *op_p)
2223 {
2224   tree base = *op_p, step = size_zero_node;
2225   struct iv *civ;
2226   struct ifs_ivopts_data ifs_ivopts_data;
2227
2228   /* Do not play with volatile memory references.  A bit too conservative,
2229      perhaps, but safe.  */
2230   if (gimple_has_volatile_ops (stmt))
2231     goto fail;
2232
2233   /* Ignore bitfields for now.  Not really something terribly complicated
2234      to handle.  TODO.  */
2235   if (TREE_CODE (base) == BIT_FIELD_REF)
2236     goto fail;
2237
2238   base = unshare_expr (base);
2239
2240   if (TREE_CODE (base) == TARGET_MEM_REF)
2241     {
2242       tree type = build_pointer_type (TREE_TYPE (base));
2243       tree astep;
2244
2245       if (TMR_BASE (base)
2246           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2247         {
2248           civ = get_iv (data, TMR_BASE (base));
2249           if (!civ)
2250             goto fail;
2251
2252           TMR_BASE (base) = civ->base;
2253           step = civ->step;
2254         }
2255       if (TMR_INDEX2 (base)
2256           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2257         {
2258           civ = get_iv (data, TMR_INDEX2 (base));
2259           if (!civ)
2260             goto fail;
2261
2262           TMR_INDEX2 (base) = civ->base;
2263           step = civ->step;
2264         }
2265       if (TMR_INDEX (base)
2266           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2267         {
2268           civ = get_iv (data, TMR_INDEX (base));
2269           if (!civ)
2270             goto fail;
2271
2272           TMR_INDEX (base) = civ->base;
2273           astep = civ->step;
2274
2275           if (astep)
2276             {
2277               if (TMR_STEP (base))
2278                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2279
2280               step = fold_build2 (PLUS_EXPR, type, step, astep);
2281             }
2282         }
2283
2284       if (integer_zerop (step))
2285         goto fail;
2286       base = tree_mem_ref_addr (type, base);
2287     }
2288   else
2289     {
2290       ifs_ivopts_data.ivopts_data = data;
2291       ifs_ivopts_data.stmt = stmt;
2292       ifs_ivopts_data.step = size_zero_node;
2293       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2294           || integer_zerop (ifs_ivopts_data.step))
2295         goto fail;
2296       step = ifs_ivopts_data.step;
2297
2298       /* Check that the base expression is addressable.  This needs
2299          to be done after substituting bases of IVs into it.  */
2300       if (may_be_nonaddressable_p (base))
2301         goto fail;
2302
2303       /* Moreover, on strict alignment platforms, check that it is
2304          sufficiently aligned.  */
2305       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2306         goto fail;
2307
2308       base = build_fold_addr_expr (base);
2309
2310       /* Substituting bases of IVs into the base expression might
2311          have caused folding opportunities.  */
2312       if (TREE_CODE (base) == ADDR_EXPR)
2313         {
2314           tree *ref = &TREE_OPERAND (base, 0);
2315           while (handled_component_p (*ref))
2316             ref = &TREE_OPERAND (*ref, 0);
2317           if (TREE_CODE (*ref) == MEM_REF)
2318             {
2319               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2320                                       TREE_OPERAND (*ref, 0),
2321                                       TREE_OPERAND (*ref, 1));
2322               if (tem)
2323                 *ref = tem;
2324             }
2325         }
2326     }
2327
2328   civ = alloc_iv (data, base, step);
2329   /* Fail if base object of this memory reference is unknown.  */
2330   if (civ->base_object == NULL_TREE)
2331     goto fail;
2332
2333   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2334   return;
2335
2336 fail:
2337   for_each_index (op_p, idx_record_use, data);
2338 }
2339
2340 /* Finds and records invariants used in STMT.  */
2341
2342 static void
2343 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2344 {
2345   ssa_op_iter iter;
2346   use_operand_p use_p;
2347   tree op;
2348
2349   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2350     {
2351       op = USE_FROM_PTR (use_p);
2352       record_invariant (data, op, false);
2353     }
2354 }
2355
2356 /* Finds interesting uses of induction variables in the statement STMT.  */
2357
2358 static void
2359 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2360 {
2361   struct iv *iv;
2362   tree op, *lhs, *rhs;
2363   ssa_op_iter iter;
2364   use_operand_p use_p;
2365   enum tree_code code;
2366
2367   find_invariants_stmt (data, stmt);
2368
2369   if (gimple_code (stmt) == GIMPLE_COND)
2370     {
2371       find_interesting_uses_cond (data, stmt);
2372       return;
2373     }
2374
2375   if (is_gimple_assign (stmt))
2376     {
2377       lhs = gimple_assign_lhs_ptr (stmt);
2378       rhs = gimple_assign_rhs1_ptr (stmt);
2379
2380       if (TREE_CODE (*lhs) == SSA_NAME)
2381         {
2382           /* If the statement defines an induction variable, the uses are not
2383              interesting by themselves.  */
2384
2385           iv = get_iv (data, *lhs);
2386
2387           if (iv && !integer_zerop (iv->step))
2388             return;
2389         }
2390
2391       code = gimple_assign_rhs_code (stmt);
2392       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2393           && (REFERENCE_CLASS_P (*rhs)
2394               || is_gimple_val (*rhs)))
2395         {
2396           if (REFERENCE_CLASS_P (*rhs))
2397             find_interesting_uses_address (data, stmt, rhs);
2398           else
2399             find_interesting_uses_op (data, *rhs);
2400
2401           if (REFERENCE_CLASS_P (*lhs))
2402             find_interesting_uses_address (data, stmt, lhs);
2403           return;
2404         }
2405       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2406         {
2407           find_interesting_uses_cond (data, stmt);
2408           return;
2409         }
2410
2411       /* TODO -- we should also handle address uses of type
2412
2413          memory = call (whatever);
2414
2415          and
2416
2417          call (memory).  */
2418     }
2419
2420   if (gimple_code (stmt) == GIMPLE_PHI
2421       && gimple_bb (stmt) == data->current_loop->header)
2422     {
2423       iv = get_iv (data, PHI_RESULT (stmt));
2424
2425       if (iv && !integer_zerop (iv->step))
2426         return;
2427     }
2428
2429   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2430     {
2431       op = USE_FROM_PTR (use_p);
2432
2433       if (TREE_CODE (op) != SSA_NAME)
2434         continue;
2435
2436       iv = get_iv (data, op);
2437       if (!iv)
2438         continue;
2439
2440       find_interesting_uses_op (data, op);
2441     }
2442 }
2443
2444 /* Finds interesting uses of induction variables outside of loops
2445    on loop exit edge EXIT.  */
2446
2447 static void
2448 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2449 {
2450   gphi *phi;
2451   gphi_iterator psi;
2452   tree def;
2453
2454   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2455     {
2456       phi = psi.phi ();
2457       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2458       if (!virtual_operand_p (def))
2459         find_interesting_uses_op (data, def);
2460     }
2461 }
2462
2463 /* Compute maximum offset of [base + offset] addressing mode
2464    for memory reference represented by USE.  */
2465
2466 static HOST_WIDE_INT
2467 compute_max_addr_offset (struct iv_use *use)
2468 {
2469   int width;
2470   rtx reg, addr;
2471   HOST_WIDE_INT i, off;
2472   unsigned list_index, num;
2473   addr_space_t as;
2474   machine_mode mem_mode, addr_mode;
2475   static vec<HOST_WIDE_INT> max_offset_list;
2476
2477   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2478   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2479
2480   num = max_offset_list.length ();
2481   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2482   if (list_index >= num)
2483     {
2484       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2485       for (; num < max_offset_list.length (); num++)
2486         max_offset_list[num] = -1;
2487     }
2488
2489   off = max_offset_list[list_index];
2490   if (off != -1)
2491     return off;
2492
2493   addr_mode = targetm.addr_space.address_mode (as);
2494   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2495   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2496
2497   width = GET_MODE_BITSIZE (addr_mode) - 1;
2498   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2499     width = HOST_BITS_PER_WIDE_INT - 1;
2500
2501   for (i = width; i > 0; i--)
2502     {
2503       off = (HOST_WIDE_INT_1U << i) - 1;
2504       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2505       if (memory_address_addr_space_p (mem_mode, addr, as))
2506         break;
2507
2508       /* For some strict-alignment targets, the offset must be naturally
2509          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2510       off = (HOST_WIDE_INT_1U << i);
2511       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2512         {
2513           off -= GET_MODE_SIZE (mem_mode);
2514           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2515           if (memory_address_addr_space_p (mem_mode, addr, as))
2516             break;
2517         }
2518     }
2519   if (i == 0)
2520     off = 0;
2521
2522   max_offset_list[list_index] = off;
2523   return off;
2524 }
2525
2526 /* Comparison function to sort group in ascending order of addr_offset.  */
2527
2528 static int
2529 group_compare_offset (const void *a, const void *b)
2530 {
2531   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2532   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2533
2534   if ((*u1)->addr_offset != (*u2)->addr_offset)
2535     return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2536   else
2537     return 0;
2538 }
2539
2540 /* Check if small groups should be split.  Return true if no group
2541    contains more than two uses with distinct addr_offsets.  Return
2542    false otherwise.  We want to split such groups because:
2543
2544      1) Small groups don't have much benefit and may interfer with
2545         general candidate selection.
2546      2) Size for problem with only small groups is usually small and
2547         general algorithm can handle it well.
2548
2549    TODO -- Above claim may not hold when we want to merge memory
2550    accesses with conseuctive addresses.  */
2551
2552 static bool
2553 split_small_address_groups_p (struct ivopts_data *data)
2554 {
2555   unsigned int i, j, distinct = 1;
2556   struct iv_use *pre;
2557   struct iv_group *group;
2558
2559   for (i = 0; i < data->vgroups.length (); i++)
2560     {
2561       group = data->vgroups[i];
2562       if (group->vuses.length () == 1)
2563         continue;
2564
2565       gcc_assert (group->type == USE_ADDRESS);
2566       if (group->vuses.length () == 2)
2567         {
2568           if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2569             std::swap (group->vuses[0], group->vuses[1]);
2570         }
2571       else
2572         group->vuses.qsort (group_compare_offset);
2573
2574       if (distinct > 2)
2575         continue;
2576
2577       distinct = 1;
2578       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2579         {
2580           if (group->vuses[j]->addr_offset != pre->addr_offset)
2581             {
2582               pre = group->vuses[j];
2583               distinct++;
2584             }
2585
2586           if (distinct > 2)
2587             break;
2588         }
2589     }
2590
2591   return (distinct <= 2);
2592 }
2593
2594 /* For each group of address type uses, this function further groups
2595    these uses according to the maximum offset supported by target's
2596    [base + offset] addressing mode.  */
2597
2598 static void
2599 split_address_groups (struct ivopts_data *data)
2600 {
2601   unsigned int i, j;
2602   HOST_WIDE_INT max_offset = -1;
2603
2604   /* Reset max offset to split all small groups.  */
2605   if (split_small_address_groups_p (data))
2606     max_offset = 0;
2607
2608   for (i = 0; i < data->vgroups.length (); i++)
2609     {
2610       struct iv_group *group = data->vgroups[i];
2611       struct iv_use *use = group->vuses[0];
2612
2613       use->id = 0;
2614       use->group_id = group->id;
2615       if (group->vuses.length () == 1)
2616         continue;
2617
2618       if (max_offset != 0)
2619         max_offset = compute_max_addr_offset (use);
2620
2621       for (j = 1; j < group->vuses.length (); j++)
2622         {
2623           struct iv_use *next = group->vuses[j];
2624
2625           /* Only uses with offset that can fit in offset part against
2626              the first use can be grouped together.  */
2627           if (next->addr_offset - use->addr_offset
2628               > (unsigned HOST_WIDE_INT) max_offset)
2629             break;
2630
2631           next->id = j;
2632           next->group_id = group->id;
2633         }
2634       /* Split group.  */
2635       if (j < group->vuses.length ())
2636         {
2637           struct iv_group *new_group = record_group (data, group->type);
2638           new_group->vuses.safe_splice (group->vuses);
2639           new_group->vuses.block_remove (0, j);
2640           group->vuses.truncate (j);
2641         }
2642     }
2643 }
2644
2645 /* Finds uses of the induction variables that are interesting.  */
2646
2647 static void
2648 find_interesting_uses (struct ivopts_data *data)
2649 {
2650   basic_block bb;
2651   gimple_stmt_iterator bsi;
2652   basic_block *body = get_loop_body (data->current_loop);
2653   unsigned i;
2654   edge e;
2655
2656   for (i = 0; i < data->current_loop->num_nodes; i++)
2657     {
2658       edge_iterator ei;
2659       bb = body[i];
2660
2661       FOR_EACH_EDGE (e, ei, bb->succs)
2662         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2663             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2664           find_interesting_uses_outside (data, e);
2665
2666       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2667         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2668       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2669         if (!is_gimple_debug (gsi_stmt (bsi)))
2670           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2671     }
2672
2673   split_address_groups (data);
2674
2675   if (dump_file && (dump_flags & TDF_DETAILS))
2676     {
2677       bitmap_iterator bi;
2678
2679       fprintf (dump_file, "\n<Invariant Vars>:\n");
2680       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2681         {
2682           struct version_info *info = ver_info (data, i);
2683           if (info->inv_id)
2684             {
2685               fprintf (dump_file, "Inv %d:\t", info->inv_id);
2686               print_generic_expr (dump_file, info->name, TDF_SLIM);
2687               fprintf (dump_file, "%s\n",
2688                        info->has_nonlin_use ? "" : "\t(eliminable)");
2689             }
2690         }
2691
2692       fprintf (dump_file, "\n<IV Groups>:\n");
2693       dump_groups (dump_file, data);
2694       fprintf (dump_file, "\n");
2695     }
2696
2697   free (body);
2698 }
2699
2700 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2701    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2702    we are at the top-level of the processed address.  */
2703
2704 static tree
2705 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2706                 HOST_WIDE_INT *offset)
2707 {
2708   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2709   enum tree_code code;
2710   tree type, orig_type = TREE_TYPE (expr);
2711   HOST_WIDE_INT off0, off1, st;
2712   tree orig_expr = expr;
2713
2714   STRIP_NOPS (expr);
2715
2716   type = TREE_TYPE (expr);
2717   code = TREE_CODE (expr);
2718   *offset = 0;
2719
2720   switch (code)
2721     {
2722     case INTEGER_CST:
2723       if (!cst_and_fits_in_hwi (expr)
2724           || integer_zerop (expr))
2725         return orig_expr;
2726
2727       *offset = int_cst_value (expr);
2728       return build_int_cst (orig_type, 0);
2729
2730     case POINTER_PLUS_EXPR:
2731     case PLUS_EXPR:
2732     case MINUS_EXPR:
2733       op0 = TREE_OPERAND (expr, 0);
2734       op1 = TREE_OPERAND (expr, 1);
2735
2736       op0 = strip_offset_1 (op0, false, false, &off0);
2737       op1 = strip_offset_1 (op1, false, false, &off1);
2738
2739       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2740       if (op0 == TREE_OPERAND (expr, 0)
2741           && op1 == TREE_OPERAND (expr, 1))
2742         return orig_expr;
2743
2744       if (integer_zerop (op1))
2745         expr = op0;
2746       else if (integer_zerop (op0))
2747         {
2748           if (code == MINUS_EXPR)
2749             expr = fold_build1 (NEGATE_EXPR, type, op1);
2750           else
2751             expr = op1;
2752         }
2753       else
2754         expr = fold_build2 (code, type, op0, op1);
2755
2756       return fold_convert (orig_type, expr);
2757
2758     case MULT_EXPR:
2759       op1 = TREE_OPERAND (expr, 1);
2760       if (!cst_and_fits_in_hwi (op1))
2761         return orig_expr;
2762
2763       op0 = TREE_OPERAND (expr, 0);
2764       op0 = strip_offset_1 (op0, false, false, &off0);
2765       if (op0 == TREE_OPERAND (expr, 0))
2766         return orig_expr;
2767
2768       *offset = off0 * int_cst_value (op1);
2769       if (integer_zerop (op0))
2770         expr = op0;
2771       else
2772         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2773
2774       return fold_convert (orig_type, expr);
2775
2776     case ARRAY_REF:
2777     case ARRAY_RANGE_REF:
2778       if (!inside_addr)
2779         return orig_expr;
2780
2781       step = array_ref_element_size (expr);
2782       if (!cst_and_fits_in_hwi (step))
2783         break;
2784
2785       st = int_cst_value (step);
2786       op1 = TREE_OPERAND (expr, 1);
2787       op1 = strip_offset_1 (op1, false, false, &off1);
2788       *offset = off1 * st;
2789
2790       if (top_compref
2791           && integer_zerop (op1))
2792         {
2793           /* Strip the component reference completely.  */
2794           op0 = TREE_OPERAND (expr, 0);
2795           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2796           *offset += off0;
2797           return op0;
2798         }
2799       break;
2800
2801     case COMPONENT_REF:
2802       {
2803         tree field;
2804
2805         if (!inside_addr)
2806           return orig_expr;
2807
2808         tmp = component_ref_field_offset (expr);
2809         field = TREE_OPERAND (expr, 1);
2810         if (top_compref
2811             && cst_and_fits_in_hwi (tmp)
2812             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2813           {
2814             HOST_WIDE_INT boffset, abs_off;
2815
2816             /* Strip the component reference completely.  */
2817             op0 = TREE_OPERAND (expr, 0);
2818             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2819             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2820             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2821             if (boffset < 0)
2822               abs_off = -abs_off;
2823
2824             *offset = off0 + int_cst_value (tmp) + abs_off;
2825             return op0;
2826           }
2827       }
2828       break;
2829
2830     case ADDR_EXPR:
2831       op0 = TREE_OPERAND (expr, 0);
2832       op0 = strip_offset_1 (op0, true, true, &off0);
2833       *offset += off0;
2834
2835       if (op0 == TREE_OPERAND (expr, 0))
2836         return orig_expr;
2837
2838       expr = build_fold_addr_expr (op0);
2839       return fold_convert (orig_type, expr);
2840
2841     case MEM_REF:
2842       /* ???  Offset operand?  */
2843       inside_addr = false;
2844       break;
2845
2846     default:
2847       return orig_expr;
2848     }
2849
2850   /* Default handling of expressions for that we want to recurse into
2851      the first operand.  */
2852   op0 = TREE_OPERAND (expr, 0);
2853   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2854   *offset += off0;
2855
2856   if (op0 == TREE_OPERAND (expr, 0)
2857       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2858     return orig_expr;
2859
2860   expr = copy_node (expr);
2861   TREE_OPERAND (expr, 0) = op0;
2862   if (op1)
2863     TREE_OPERAND (expr, 1) = op1;
2864
2865   /* Inside address, we might strip the top level component references,
2866      thus changing type of the expression.  Handling of ADDR_EXPR
2867      will fix that.  */
2868   expr = fold_convert (orig_type, expr);
2869
2870   return expr;
2871 }
2872
2873 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2874
2875 static tree
2876 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2877 {
2878   HOST_WIDE_INT off;
2879   tree core = strip_offset_1 (expr, false, false, &off);
2880   *offset = off;
2881   return core;
2882 }
2883
2884 /* Returns variant of TYPE that can be used as base for different uses.
2885    We return unsigned type with the same precision, which avoids problems
2886    with overflows.  */
2887
2888 static tree
2889 generic_type_for (tree type)
2890 {
2891   if (POINTER_TYPE_P (type))
2892     return unsigned_type_for (type);
2893
2894   if (TYPE_UNSIGNED (type))
2895     return type;
2896
2897   return unsigned_type_for (type);
2898 }
2899
2900 /* Private data for walk_tree.  */
2901
2902 struct walk_tree_data
2903 {
2904   bitmap *inv_vars;
2905   struct ivopts_data *idata;
2906 };
2907
2908 /* Callback function for walk_tree, it records invariants and symbol
2909    reference in *EXPR_P.  DATA is the structure storing result info.  */
2910
2911 static tree
2912 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2913 {
2914   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2915   struct version_info *info;
2916
2917   if (TREE_CODE (*expr_p) != SSA_NAME)
2918     return NULL_TREE;
2919
2920   info = name_info (wdata->idata, *expr_p);
2921   if (!info->inv_id || info->has_nonlin_use)
2922     return NULL_TREE;
2923
2924   if (!*wdata->inv_vars)
2925     *wdata->inv_vars = BITMAP_ALLOC (NULL);
2926   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2927
2928   return NULL_TREE;
2929 }
2930
2931 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
2932    store it.  */
2933
2934 static inline void
2935 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2936 {
2937   struct walk_tree_data wdata;
2938
2939   if (!inv_vars)
2940     return;
2941
2942   wdata.idata = data;
2943   wdata.inv_vars = inv_vars;
2944   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
2945 }
2946
2947 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2948    position to POS.  If USE is not NULL, the candidate is set as related to
2949    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2950    replacement of the final value of the iv by a direct computation.  */
2951
2952 static struct iv_cand *
2953 add_candidate_1 (struct ivopts_data *data,
2954                  tree base, tree step, bool important, enum iv_position pos,
2955                  struct iv_use *use, gimple *incremented_at,
2956                  struct iv *orig_iv = NULL)
2957 {
2958   unsigned i;
2959   struct iv_cand *cand = NULL;
2960   tree type, orig_type;
2961
2962   gcc_assert (base && step);
2963
2964   /* -fkeep-gc-roots-live means that we have to keep a real pointer
2965      live, but the ivopts code may replace a real pointer with one
2966      pointing before or after the memory block that is then adjusted
2967      into the memory block during the loop.  FIXME: It would likely be
2968      better to actually force the pointer live and still use ivopts;
2969      for example, it would be enough to write the pointer into memory
2970      and keep it there until after the loop.  */
2971   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2972     return NULL;
2973
2974   /* For non-original variables, make sure their values are computed in a type
2975      that does not invoke undefined behavior on overflows (since in general,
2976      we cannot prove that these induction variables are non-wrapping).  */
2977   if (pos != IP_ORIGINAL)
2978     {
2979       orig_type = TREE_TYPE (base);
2980       type = generic_type_for (orig_type);
2981       if (type != orig_type)
2982         {
2983           base = fold_convert (type, base);
2984           step = fold_convert (type, step);
2985         }
2986     }
2987
2988   for (i = 0; i < data->vcands.length (); i++)
2989     {
2990       cand = data->vcands[i];
2991
2992       if (cand->pos != pos)
2993         continue;
2994
2995       if (cand->incremented_at != incremented_at
2996           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2997               && cand->ainc_use != use))
2998         continue;
2999
3000       if (operand_equal_p (base, cand->iv->base, 0)
3001           && operand_equal_p (step, cand->iv->step, 0)
3002           && (TYPE_PRECISION (TREE_TYPE (base))
3003               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3004         break;
3005     }
3006
3007   if (i == data->vcands.length ())
3008     {
3009       cand = XCNEW (struct iv_cand);
3010       cand->id = i;
3011       cand->iv = alloc_iv (data, base, step);
3012       cand->pos = pos;
3013       if (pos != IP_ORIGINAL)
3014         {
3015           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3016           cand->var_after = cand->var_before;
3017         }
3018       cand->important = important;
3019       cand->incremented_at = incremented_at;
3020       data->vcands.safe_push (cand);
3021
3022       if (TREE_CODE (step) != INTEGER_CST)
3023         find_inv_vars (data, &step, &cand->inv_vars);
3024
3025       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3026         cand->ainc_use = use;
3027       else
3028         cand->ainc_use = NULL;
3029
3030       cand->orig_iv = orig_iv;
3031       if (dump_file && (dump_flags & TDF_DETAILS))
3032         dump_cand (dump_file, cand);
3033     }
3034
3035   cand->important |= important;
3036
3037   /* Relate candidate to the group for which it is added.  */
3038   if (use)
3039     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3040
3041   return cand;
3042 }
3043
3044 /* Returns true if incrementing the induction variable at the end of the LOOP
3045    is allowed.
3046
3047    The purpose is to avoid splitting latch edge with a biv increment, thus
3048    creating a jump, possibly confusing other optimization passes and leaving
3049    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
3050    is not available (so we do not have a better alternative), or if the latch
3051    edge is already nonempty.  */
3052
3053 static bool
3054 allow_ip_end_pos_p (struct loop *loop)
3055 {
3056   if (!ip_normal_pos (loop))
3057     return true;
3058
3059   if (!empty_block_p (ip_end_pos (loop)))
3060     return true;
3061
3062   return false;
3063 }
3064
3065 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3066    Important field is set to IMPORTANT.  */
3067
3068 static void
3069 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3070                         bool important, struct iv_use *use)
3071 {
3072   basic_block use_bb = gimple_bb (use->stmt);
3073   machine_mode mem_mode;
3074   unsigned HOST_WIDE_INT cstepi;
3075
3076   /* If we insert the increment in any position other than the standard
3077      ones, we must ensure that it is incremented once per iteration.
3078      It must not be in an inner nested loop, or one side of an if
3079      statement.  */
3080   if (use_bb->loop_father != data->current_loop
3081       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3082       || stmt_could_throw_p (use->stmt)
3083       || !cst_and_fits_in_hwi (step))
3084     return;
3085
3086   cstepi = int_cst_value (step);
3087
3088   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3089   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3090         || USE_STORE_PRE_INCREMENT (mem_mode))
3091        && GET_MODE_SIZE (mem_mode) == cstepi)
3092       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3093            || USE_STORE_PRE_DECREMENT (mem_mode))
3094           && GET_MODE_SIZE (mem_mode) == -cstepi))
3095     {
3096       enum tree_code code = MINUS_EXPR;
3097       tree new_base;
3098       tree new_step = step;
3099
3100       if (POINTER_TYPE_P (TREE_TYPE (base)))
3101         {
3102           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3103           code = POINTER_PLUS_EXPR;
3104         }
3105       else
3106         new_step = fold_convert (TREE_TYPE (base), new_step);
3107       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3108       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3109                        use->stmt);
3110     }
3111   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3112         || USE_STORE_POST_INCREMENT (mem_mode))
3113        && GET_MODE_SIZE (mem_mode) == cstepi)
3114       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3115            || USE_STORE_POST_DECREMENT (mem_mode))
3116           && GET_MODE_SIZE (mem_mode) == -cstepi))
3117     {
3118       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3119                        use->stmt);
3120     }
3121 }
3122
3123 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3124    position to POS.  If USE is not NULL, the candidate is set as related to
3125    it.  The candidate computation is scheduled before exit condition and at
3126    the end of loop.  */
3127
3128 static void
3129 add_candidate (struct ivopts_data *data,
3130                tree base, tree step, bool important, struct iv_use *use,
3131                struct iv *orig_iv = NULL)
3132 {
3133   if (ip_normal_pos (data->current_loop))
3134     add_candidate_1 (data, base, step, important,
3135                      IP_NORMAL, use, NULL, orig_iv);
3136   if (ip_end_pos (data->current_loop)
3137       && allow_ip_end_pos_p (data->current_loop))
3138     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3139 }
3140
3141 /* Adds standard iv candidates.  */
3142
3143 static void
3144 add_standard_iv_candidates (struct ivopts_data *data)
3145 {
3146   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3147
3148   /* The same for a double-integer type if it is still fast enough.  */
3149   if (TYPE_PRECISION
3150         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3151       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3152     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3153                    build_int_cst (long_integer_type_node, 1), true, NULL);
3154
3155   /* The same for a double-integer type if it is still fast enough.  */
3156   if (TYPE_PRECISION
3157         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3158       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3159     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3160                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3161 }
3162
3163
3164 /* Adds candidates bases on the old induction variable IV.  */
3165
3166 static void
3167 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3168 {
3169   gimple *phi;
3170   tree def;
3171   struct iv_cand *cand;
3172
3173   /* Check if this biv is used in address type use.  */
3174   if (iv->no_overflow  && iv->have_address_use
3175       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3176       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3177     {
3178       tree base = fold_convert (sizetype, iv->base);
3179       tree step = fold_convert (sizetype, iv->step);
3180
3181       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3182       add_candidate (data, base, step, true, NULL, iv);
3183       /* Add iv cand of the original type only if it has nonlinear use.  */
3184       if (iv->nonlin_use)
3185         add_candidate (data, iv->base, iv->step, true, NULL);
3186     }
3187   else
3188     add_candidate (data, iv->base, iv->step, true, NULL);
3189
3190   /* The same, but with initial value zero.  */
3191   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3192     add_candidate (data, size_int (0), iv->step, true, NULL);
3193   else
3194     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3195                    iv->step, true, NULL);
3196
3197   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3198   if (gimple_code (phi) == GIMPLE_PHI)
3199     {
3200       /* Additionally record the possibility of leaving the original iv
3201          untouched.  */
3202       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3203       /* Don't add candidate if it's from another PHI node because
3204          it's an affine iv appearing in the form of PEELED_CHREC.  */
3205       phi = SSA_NAME_DEF_STMT (def);
3206       if (gimple_code (phi) != GIMPLE_PHI)
3207         {
3208           cand = add_candidate_1 (data,
3209                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3210                                   SSA_NAME_DEF_STMT (def));
3211           if (cand)
3212             {
3213               cand->var_before = iv->ssa_name;
3214               cand->var_after = def;
3215             }
3216         }
3217       else
3218         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3219     }
3220 }
3221
3222 /* Adds candidates based on the old induction variables.  */
3223
3224 static void
3225 add_iv_candidate_for_bivs (struct ivopts_data *data)
3226 {
3227   unsigned i;
3228   struct iv *iv;
3229   bitmap_iterator bi;
3230
3231   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3232     {
3233       iv = ver_info (data, i)->iv;
3234       if (iv && iv->biv_p && !integer_zerop (iv->step))
3235         add_iv_candidate_for_biv (data, iv);
3236     }
3237 }
3238
3239 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3240
3241 static void
3242 record_common_cand (struct ivopts_data *data, tree base,
3243                     tree step, struct iv_use *use)
3244 {
3245   struct iv_common_cand ent;
3246   struct iv_common_cand **slot;
3247
3248   ent.base = base;
3249   ent.step = step;
3250   ent.hash = iterative_hash_expr (base, 0);
3251   ent.hash = iterative_hash_expr (step, ent.hash);
3252
3253   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3254   if (*slot == NULL)
3255     {
3256       *slot = new iv_common_cand ();
3257       (*slot)->base = base;
3258       (*slot)->step = step;
3259       (*slot)->uses.create (8);
3260       (*slot)->hash = ent.hash;
3261       data->iv_common_cands.safe_push ((*slot));
3262     }
3263
3264   gcc_assert (use != NULL);
3265   (*slot)->uses.safe_push (use);
3266   return;
3267 }
3268
3269 /* Comparison function used to sort common candidates.  */
3270
3271 static int
3272 common_cand_cmp (const void *p1, const void *p2)
3273 {
3274   unsigned n1, n2;
3275   const struct iv_common_cand *const *const ccand1
3276     = (const struct iv_common_cand *const *)p1;
3277   const struct iv_common_cand *const *const ccand2
3278     = (const struct iv_common_cand *const *)p2;
3279
3280   n1 = (*ccand1)->uses.length ();
3281   n2 = (*ccand2)->uses.length ();
3282   return n2 - n1;
3283 }
3284
3285 /* Adds IV candidates based on common candidated recorded.  */
3286
3287 static void
3288 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3289 {
3290   unsigned i, j;
3291   struct iv_cand *cand_1, *cand_2;
3292
3293   data->iv_common_cands.qsort (common_cand_cmp);
3294   for (i = 0; i < data->iv_common_cands.length (); i++)
3295     {
3296       struct iv_common_cand *ptr = data->iv_common_cands[i];
3297
3298       /* Only add IV candidate if it's derived from multiple uses.  */
3299       if (ptr->uses.length () <= 1)
3300         break;
3301
3302       cand_1 = NULL;
3303       cand_2 = NULL;
3304       if (ip_normal_pos (data->current_loop))
3305         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3306                                   false, IP_NORMAL, NULL, NULL);
3307
3308       if (ip_end_pos (data->current_loop)
3309           && allow_ip_end_pos_p (data->current_loop))
3310         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3311                                   false, IP_END, NULL, NULL);
3312
3313       /* Bind deriving uses and the new candidates.  */
3314       for (j = 0; j < ptr->uses.length (); j++)
3315         {
3316           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3317           if (cand_1)
3318             bitmap_set_bit (group->related_cands, cand_1->id);
3319           if (cand_2)
3320             bitmap_set_bit (group->related_cands, cand_2->id);
3321         }
3322     }
3323
3324   /* Release data since it is useless from this point.  */
3325   data->iv_common_cand_tab->empty ();
3326   data->iv_common_cands.truncate (0);
3327 }
3328
3329 /* Adds candidates based on the value of USE's iv.  */
3330
3331 static void
3332 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3333 {
3334   unsigned HOST_WIDE_INT offset;
3335   tree base;
3336   tree basetype;
3337   struct iv *iv = use->iv;
3338
3339   add_candidate (data, iv->base, iv->step, false, use);
3340
3341   /* Record common candidate for use in case it can be shared by others.  */
3342   record_common_cand (data, iv->base, iv->step, use);
3343
3344   /* Record common candidate with initial value zero.  */
3345   basetype = TREE_TYPE (iv->base);
3346   if (POINTER_TYPE_P (basetype))
3347     basetype = sizetype;
3348   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3349
3350   /* Record common candidate with constant offset stripped in base.
3351      Like the use itself, we also add candidate directly for it.  */
3352   base = strip_offset (iv->base, &offset);
3353   if (offset || base != iv->base)
3354     {
3355       record_common_cand (data, base, iv->step, use);
3356       add_candidate (data, base, iv->step, false, use);
3357     }
3358
3359   /* Record common candidate with base_object removed in base.  */
3360   base = iv->base;
3361   STRIP_NOPS (base);
3362   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3363     {
3364       tree step = iv->step;
3365
3366       STRIP_NOPS (step);
3367       base = TREE_OPERAND (base, 1);
3368       step = fold_convert (sizetype, step);
3369       record_common_cand (data, base, step, use);
3370       /* Also record common candidate with offset stripped.  */
3371       base = strip_offset (base, &offset);
3372       if (offset)
3373         record_common_cand (data, base, step, use);
3374     }
3375
3376   /* At last, add auto-incremental candidates.  Make such variables
3377      important since other iv uses with same base object may be based
3378      on it.  */
3379   if (use != NULL && use->type == USE_ADDRESS)
3380     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3381 }
3382
3383 /* Adds candidates based on the uses.  */
3384
3385 static void
3386 add_iv_candidate_for_groups (struct ivopts_data *data)
3387 {
3388   unsigned i;
3389
3390   /* Only add candidate for the first use in group.  */
3391   for (i = 0; i < data->vgroups.length (); i++)
3392     {
3393       struct iv_group *group = data->vgroups[i];
3394
3395       gcc_assert (group->vuses[0] != NULL);
3396       add_iv_candidate_for_use (data, group->vuses[0]);
3397     }
3398   add_iv_candidate_derived_from_uses (data);
3399 }
3400
3401 /* Record important candidates and add them to related_cands bitmaps.  */
3402
3403 static void
3404 record_important_candidates (struct ivopts_data *data)
3405 {
3406   unsigned i;
3407   struct iv_group *group;
3408
3409   for (i = 0; i < data->vcands.length (); i++)
3410     {
3411       struct iv_cand *cand = data->vcands[i];
3412
3413       if (cand->important)
3414         bitmap_set_bit (data->important_candidates, i);
3415     }
3416
3417   data->consider_all_candidates = (data->vcands.length ()
3418                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3419
3420   /* Add important candidates to groups' related_cands bitmaps.  */
3421   for (i = 0; i < data->vgroups.length (); i++)
3422     {
3423       group = data->vgroups[i];
3424       bitmap_ior_into (group->related_cands, data->important_candidates);
3425     }
3426 }
3427
3428 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3429    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3430    we allocate a simple list to every use.  */
3431
3432 static void
3433 alloc_use_cost_map (struct ivopts_data *data)
3434 {
3435   unsigned i, size, s;
3436
3437   for (i = 0; i < data->vgroups.length (); i++)
3438     {
3439       struct iv_group *group = data->vgroups[i];
3440
3441       if (data->consider_all_candidates)
3442         size = data->vcands.length ();
3443       else
3444         {
3445           s = bitmap_count_bits (group->related_cands);
3446
3447           /* Round up to the power of two, so that moduling by it is fast.  */
3448           size = s ? (1 << ceil_log2 (s)) : 1;
3449         }
3450
3451       group->n_map_members = size;
3452       group->cost_map = XCNEWVEC (struct cost_pair, size);
3453     }
3454 }
3455
3456 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3457    on invariants INV_VARS and that the value used in expressing it is
3458    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3459
3460 static void
3461 set_group_iv_cost (struct ivopts_data *data,
3462                    struct iv_group *group, struct iv_cand *cand,
3463                    comp_cost cost, bitmap inv_vars, tree value,
3464                    enum tree_code comp, bitmap inv_exprs)
3465 {
3466   unsigned i, s;
3467
3468   if (cost.infinite_cost_p ())
3469     {
3470       BITMAP_FREE (inv_vars);
3471       BITMAP_FREE (inv_exprs);
3472       return;
3473     }
3474
3475   if (data->consider_all_candidates)
3476     {
3477       group->cost_map[cand->id].cand = cand;
3478       group->cost_map[cand->id].cost = cost;
3479       group->cost_map[cand->id].inv_vars = inv_vars;
3480       group->cost_map[cand->id].inv_exprs = inv_exprs;
3481       group->cost_map[cand->id].value = value;
3482       group->cost_map[cand->id].comp = comp;
3483       return;
3484     }
3485
3486   /* n_map_members is a power of two, so this computes modulo.  */
3487   s = cand->id & (group->n_map_members - 1);
3488   for (i = s; i < group->n_map_members; i++)
3489     if (!group->cost_map[i].cand)
3490       goto found;
3491   for (i = 0; i < s; i++)
3492     if (!group->cost_map[i].cand)
3493       goto found;
3494
3495   gcc_unreachable ();
3496
3497 found:
3498   group->cost_map[i].cand = cand;
3499   group->cost_map[i].cost = cost;
3500   group->cost_map[i].inv_vars = inv_vars;
3501   group->cost_map[i].inv_exprs = inv_exprs;
3502   group->cost_map[i].value = value;
3503   group->cost_map[i].comp = comp;
3504 }
3505
3506 /* Gets cost of (GROUP, CAND) pair.  */
3507
3508 static struct cost_pair *
3509 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3510                    struct iv_cand *cand)
3511 {
3512   unsigned i, s;
3513   struct cost_pair *ret;
3514
3515   if (!cand)
3516     return NULL;
3517
3518   if (data->consider_all_candidates)
3519     {
3520       ret = group->cost_map + cand->id;
3521       if (!ret->cand)
3522         return NULL;
3523
3524       return ret;
3525     }
3526
3527   /* n_map_members is a power of two, so this computes modulo.  */
3528   s = cand->id & (group->n_map_members - 1);
3529   for (i = s; i < group->n_map_members; i++)
3530     if (group->cost_map[i].cand == cand)
3531       return group->cost_map + i;
3532     else if (group->cost_map[i].cand == NULL)
3533       return NULL;
3534   for (i = 0; i < s; i++)
3535     if (group->cost_map[i].cand == cand)
3536       return group->cost_map + i;
3537     else if (group->cost_map[i].cand == NULL)
3538       return NULL;
3539
3540   return NULL;
3541 }
3542
3543 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3544 static rtx
3545 produce_memory_decl_rtl (tree obj, int *regno)
3546 {
3547   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3548   machine_mode address_mode = targetm.addr_space.address_mode (as);
3549   rtx x;
3550
3551   gcc_assert (obj);
3552   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3553     {
3554       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3555       x = gen_rtx_SYMBOL_REF (address_mode, name);
3556       SET_SYMBOL_REF_DECL (x, obj);
3557       x = gen_rtx_MEM (DECL_MODE (obj), x);
3558       set_mem_addr_space (x, as);
3559       targetm.encode_section_info (obj, x, true);
3560     }
3561   else
3562     {
3563       x = gen_raw_REG (address_mode, (*regno)++);
3564       x = gen_rtx_MEM (DECL_MODE (obj), x);
3565       set_mem_addr_space (x, as);
3566     }
3567
3568   return x;
3569 }
3570
3571 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3572    walk_tree.  DATA contains the actual fake register number.  */
3573
3574 static tree
3575 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3576 {
3577   tree obj = NULL_TREE;
3578   rtx x = NULL_RTX;
3579   int *regno = (int *) data;
3580
3581   switch (TREE_CODE (*expr_p))
3582     {
3583     case ADDR_EXPR:
3584       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3585            handled_component_p (*expr_p);
3586            expr_p = &TREE_OPERAND (*expr_p, 0))
3587         continue;
3588       obj = *expr_p;
3589       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3590         x = produce_memory_decl_rtl (obj, regno);
3591       break;
3592
3593     case SSA_NAME:
3594       *ws = 0;
3595       obj = SSA_NAME_VAR (*expr_p);
3596       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3597       if (!obj)
3598         return NULL_TREE;
3599       if (!DECL_RTL_SET_P (obj))
3600         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3601       break;
3602
3603     case VAR_DECL:
3604     case PARM_DECL:
3605     case RESULT_DECL:
3606       *ws = 0;
3607       obj = *expr_p;
3608
3609       if (DECL_RTL_SET_P (obj))
3610         break;
3611
3612       if (DECL_MODE (obj) == BLKmode)
3613         x = produce_memory_decl_rtl (obj, regno);
3614       else
3615         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3616
3617       break;
3618
3619     default:
3620       break;
3621     }
3622
3623   if (x)
3624     {
3625       decl_rtl_to_reset.safe_push (obj);
3626       SET_DECL_RTL (obj, x);
3627     }
3628
3629   return NULL_TREE;
3630 }
3631
3632 /* Determines cost of the computation of EXPR.  */
3633
3634 static unsigned
3635 computation_cost (tree expr, bool speed)
3636 {
3637   rtx_insn *seq;
3638   rtx rslt;
3639   tree type = TREE_TYPE (expr);
3640   unsigned cost;
3641   /* Avoid using hard regs in ways which may be unsupported.  */
3642   int regno = LAST_VIRTUAL_REGISTER + 1;
3643   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3644   enum node_frequency real_frequency = node->frequency;
3645
3646   node->frequency = NODE_FREQUENCY_NORMAL;
3647   crtl->maybe_hot_insn_p = speed;
3648   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3649   start_sequence ();
3650   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3651   seq = get_insns ();
3652   end_sequence ();
3653   default_rtl_profile ();
3654   node->frequency = real_frequency;
3655
3656   cost = seq_cost (seq, speed);
3657   if (MEM_P (rslt))
3658     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3659                           TYPE_ADDR_SPACE (type), speed);
3660   else if (!REG_P (rslt))
3661     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3662
3663   return cost;
3664 }
3665
3666 /* Returns variable containing the value of candidate CAND at statement AT.  */
3667
3668 static tree
3669 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3670 {
3671   if (stmt_after_increment (loop, cand, stmt))
3672     return cand->var_after;
3673   else
3674     return cand->var_before;
3675 }
3676
3677 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3678    same precision that is at least as wide as the precision of TYPE, stores
3679    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3680    type of A and B.  */
3681
3682 static tree
3683 determine_common_wider_type (tree *a, tree *b)
3684 {
3685   tree wider_type = NULL;
3686   tree suba, subb;
3687   tree atype = TREE_TYPE (*a);
3688
3689   if (CONVERT_EXPR_P (*a))
3690     {
3691       suba = TREE_OPERAND (*a, 0);
3692       wider_type = TREE_TYPE (suba);
3693       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3694         return atype;
3695     }
3696   else
3697     return atype;
3698
3699   if (CONVERT_EXPR_P (*b))
3700     {
3701       subb = TREE_OPERAND (*b, 0);
3702       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3703         return atype;
3704     }
3705   else
3706     return atype;
3707
3708   *a = suba;
3709   *b = subb;
3710   return wider_type;
3711 }
3712
3713 /* Determines the expression by that USE is expressed from induction variable
3714    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3715    decomposed form.  The invariant part is stored in AFF_INV; while variant
3716    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3717    non-null.  Returns false if USE cannot be expressed using CAND.  */
3718
3719 static bool
3720 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3721                        struct iv_cand *cand, struct aff_tree *aff_inv,
3722                        struct aff_tree *aff_var, widest_int *prat = NULL)
3723 {
3724   tree ubase = use->iv->base, ustep = use->iv->step;
3725   tree cbase = cand->iv->base, cstep = cand->iv->step;
3726   tree common_type, uutype, var, cstep_common;
3727   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3728   aff_tree aff_cbase;
3729   widest_int rat;
3730
3731   /* We must have a precision to express the values of use.  */
3732   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3733     return false;
3734
3735   var = var_at_stmt (loop, cand, at);
3736   uutype = unsigned_type_for (utype);
3737
3738   /* If the conversion is not noop, perform it.  */
3739   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3740     {
3741       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3742           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3743         {
3744           tree inner_base, inner_step, inner_type;
3745           inner_base = TREE_OPERAND (cbase, 0);
3746           if (CONVERT_EXPR_P (cstep))
3747             inner_step = TREE_OPERAND (cstep, 0);
3748           else
3749             inner_step = cstep;
3750
3751           inner_type = TREE_TYPE (inner_base);
3752           /* If candidate is added from a biv whose type is smaller than
3753              ctype, we know both candidate and the biv won't overflow.
3754              In this case, it's safe to skip the convertion in candidate.
3755              As an example, (unsigned short)((unsigned long)A) equals to
3756              (unsigned short)A, if A has a type no larger than short.  */
3757           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3758             {
3759               cbase = inner_base;
3760               cstep = inner_step;
3761             }
3762         }
3763       cbase = fold_convert (uutype, cbase);
3764       cstep = fold_convert (uutype, cstep);
3765       var = fold_convert (uutype, var);
3766     }
3767
3768   /* Ratio is 1 when computing the value of biv cand by itself.
3769      We can't rely on constant_multiple_of in this case because the
3770      use is created after the original biv is selected.  The call
3771      could fail because of inconsistent fold behavior.  See PR68021
3772      for more information.  */
3773   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3774     {
3775       gcc_assert (is_gimple_assign (use->stmt));
3776       gcc_assert (use->iv->ssa_name == cand->var_after);
3777       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3778       rat = 1;
3779     }
3780   else if (!constant_multiple_of (ustep, cstep, &rat))
3781     return false;
3782
3783   if (prat)
3784     *prat = rat;
3785
3786   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3787      type, we achieve better folding by computing their difference in this
3788      wider type, and cast the result to UUTYPE.  We do not need to worry about
3789      overflows, as all the arithmetics will in the end be performed in UUTYPE
3790      anyway.  */
3791   common_type = determine_common_wider_type (&ubase, &cbase);
3792
3793   /* use = ubase - ratio * cbase + ratio * var.  */
3794   tree_to_aff_combination (ubase, common_type, aff_inv);
3795   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3796   tree_to_aff_combination (var, uutype, aff_var);
3797
3798   /* We need to shift the value if we are after the increment.  */
3799   if (stmt_after_increment (loop, cand, at))
3800     {
3801       aff_tree cstep_aff;
3802
3803       if (common_type != uutype)
3804         cstep_common = fold_convert (common_type, cstep);
3805       else
3806         cstep_common = cstep;
3807
3808       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3809       aff_combination_add (&aff_cbase, &cstep_aff);
3810     }
3811
3812   aff_combination_scale (&aff_cbase, -rat);
3813   aff_combination_add (aff_inv, &aff_cbase);
3814   if (common_type != uutype)
3815     aff_combination_convert (aff_inv, uutype);
3816
3817   aff_combination_scale (aff_var, rat);
3818   return true;
3819 }
3820
3821 /* Determines the expression by that USE is expressed from induction variable
3822    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3823    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3824
3825 static bool
3826 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3827                      struct iv_cand *cand, struct aff_tree *aff)
3828 {
3829   aff_tree aff_var;
3830
3831   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3832     return false;
3833
3834   aff_combination_add (aff, &aff_var);
3835   return true;
3836 }
3837
3838 /* Return the type of USE.  */
3839
3840 static tree
3841 get_use_type (struct iv_use *use)
3842 {
3843   tree base_type = TREE_TYPE (use->iv->base);
3844   tree type;
3845
3846   if (use->type == USE_ADDRESS)
3847     {
3848       /* The base_type may be a void pointer.  Create a pointer type based on
3849          the mem_ref instead.  */
3850       type = build_pointer_type (TREE_TYPE (*use->op_p));
3851       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3852                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3853     }
3854   else
3855     type = base_type;
3856
3857   return type;
3858 }
3859
3860 /* Determines the expression by that USE is expressed from induction variable
3861    CAND at statement AT in LOOP.  The computation is unshared.  */
3862
3863 static tree
3864 get_computation_at (struct loop *loop, gimple *at,
3865                     struct iv_use *use, struct iv_cand *cand)
3866 {
3867   aff_tree aff;
3868   tree type = get_use_type (use);
3869
3870   if (!get_computation_aff (loop, at, use, cand, &aff))
3871     return NULL_TREE;
3872   unshare_aff_combination (&aff);
3873   return fold_convert (type, aff_combination_to_tree (&aff));
3874 }
3875
3876 /* Adjust the cost COST for being in loop setup rather than loop body.
3877    If we're optimizing for space, the loop setup overhead is constant;
3878    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3879 static unsigned
3880 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3881 {
3882   if (cost == INFTY)
3883     return cost;
3884   else if (optimize_loop_for_speed_p (data->current_loop))
3885     return cost / avg_loop_niter (data->current_loop);
3886   else
3887     return cost;
3888 }
3889
3890 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3891    validity for a memory reference accessing memory of mode MODE in
3892    address space AS.  */
3893
3894
3895 bool
3896 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3897                                  addr_space_t as)
3898 {
3899 #define MAX_RATIO 128
3900   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3901   static vec<sbitmap> valid_mult_list;
3902   sbitmap valid_mult;
3903
3904   if (data_index >= valid_mult_list.length ())
3905     valid_mult_list.safe_grow_cleared (data_index + 1);
3906
3907   valid_mult = valid_mult_list[data_index];
3908   if (!valid_mult)
3909     {
3910       machine_mode address_mode = targetm.addr_space.address_mode (as);
3911       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3912       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3913       rtx addr, scaled;
3914       HOST_WIDE_INT i;
3915
3916       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3917       bitmap_clear (valid_mult);
3918       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3919       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3920       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3921         {
3922           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3923           if (memory_address_addr_space_p (mode, addr, as)
3924               || memory_address_addr_space_p (mode, scaled, as))
3925             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3926         }
3927
3928       if (dump_file && (dump_flags & TDF_DETAILS))
3929         {
3930           fprintf (dump_file, "  allowed multipliers:");
3931           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3932             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3933               fprintf (dump_file, " %d", (int) i);
3934           fprintf (dump_file, "\n");
3935           fprintf (dump_file, "\n");
3936         }
3937
3938       valid_mult_list[data_index] = valid_mult;
3939     }
3940
3941   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3942     return false;
3943
3944   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3945 }
3946
3947 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3948    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3949    variable is omitted.  Compute the cost for a memory reference that accesses
3950    a memory location of mode MEM_MODE in address space AS.
3951
3952    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3953    size of MEM_MODE / RATIO) is available.  To make this determination, we
3954    look at the size of the increment to be made, which is given in CSTEP.
3955    CSTEP may be zero if the step is unknown.
3956    STMT_AFTER_INC is true iff the statement we're looking at is after the
3957    increment of the original biv.
3958
3959    TODO -- there must be some better way.  This all is quite crude.  */
3960
3961 enum ainc_type
3962 {
3963   AINC_PRE_INC,         /* Pre increment.  */
3964   AINC_PRE_DEC,         /* Pre decrement.  */
3965   AINC_POST_INC,        /* Post increment.  */
3966   AINC_POST_DEC,        /* Post decrement.  */
3967   AINC_NONE             /* Also the number of auto increment types.  */
3968 };
3969
3970 struct address_cost_data
3971 {
3972   HOST_WIDE_INT min_offset, max_offset;
3973   unsigned costs[2][2][2][2];
3974   unsigned ainc_costs[AINC_NONE];
3975 };
3976
3977
3978 static comp_cost
3979 get_address_cost (bool symbol_present, bool var_present,
3980                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3981                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3982                   addr_space_t as, bool speed,
3983                   bool stmt_after_inc, bool *may_autoinc)
3984 {
3985   machine_mode address_mode = targetm.addr_space.address_mode (as);
3986   static vec<address_cost_data *> address_cost_data_list;
3987   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3988   address_cost_data *data;
3989   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3990   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3991   unsigned cost, acost, complexity;
3992   enum ainc_type autoinc_type;
3993   bool offset_p, ratio_p, autoinc;
3994   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3995   unsigned HOST_WIDE_INT mask;
3996   unsigned bits;
3997
3998   if (data_index >= address_cost_data_list.length ())
3999     address_cost_data_list.safe_grow_cleared (data_index + 1);
4000
4001   data = address_cost_data_list[data_index];
4002   if (!data)
4003     {
4004       HOST_WIDE_INT i;
4005       HOST_WIDE_INT rat, off = 0;
4006       int old_cse_not_expected, width;
4007       unsigned sym_p, var_p, off_p, rat_p, add_c;
4008       rtx_insn *seq;
4009       rtx addr, base;
4010       rtx reg0, reg1;
4011
4012       data = (address_cost_data *) xcalloc (1, sizeof (*data));
4013
4014       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4015
4016       width = GET_MODE_BITSIZE (address_mode) - 1;
4017       if (width > (HOST_BITS_PER_WIDE_INT - 1))
4018         width = HOST_BITS_PER_WIDE_INT - 1;
4019       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
4020
4021       for (i = width; i >= 0; i--)
4022         {
4023           off = -(HOST_WIDE_INT_1U << i);
4024           XEXP (addr, 1) = gen_int_mode (off, address_mode);
4025           if (memory_address_addr_space_p (mem_mode, addr, as))
4026             break;
4027         }
4028       data->min_offset = (i == -1? 0 : off);
4029
4030       for (i = width; i >= 0; i--)
4031         {
4032           off = (HOST_WIDE_INT_1U << i) - 1;
4033           XEXP (addr, 1) = gen_int_mode (off, address_mode);
4034           if (memory_address_addr_space_p (mem_mode, addr, as))
4035             break;
4036           /* For some strict-alignment targets, the offset must be naturally
4037              aligned.  Try an aligned offset if mem_mode is not QImode.  */
4038           off = mem_mode != QImode
4039                 ? (HOST_WIDE_INT_1U << i)
4040                     - GET_MODE_SIZE (mem_mode)
4041                 : 0;
4042           if (off > 0)
4043             {
4044               XEXP (addr, 1) = gen_int_mode (off, address_mode);
4045               if (memory_address_addr_space_p (mem_mode, addr, as))
4046                 break;
4047             }
4048         }
4049       if (i == -1)
4050         off = 0;
4051       data->max_offset = off;
4052
4053       if (dump_file && (dump_flags & TDF_DETAILS))
4054         {
4055           fprintf (dump_file, "get_address_cost:\n");
4056           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4057                    GET_MODE_NAME (mem_mode),
4058                    data->min_offset);
4059           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4060                    GET_MODE_NAME (mem_mode),
4061                    data->max_offset);
4062         }
4063
4064       rat = 1;
4065       for (i = 2; i <= MAX_RATIO; i++)
4066         if (multiplier_allowed_in_address_p (i, mem_mode, as))
4067           {
4068             rat = i;
4069             break;
4070           }
4071
4072       /* Compute the cost of various addressing modes.  */
4073       acost = 0;
4074       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4075       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4076
4077       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4078           || USE_STORE_PRE_DECREMENT (mem_mode))
4079         {
4080           addr = gen_rtx_PRE_DEC (address_mode, reg0);
4081           has_predec[mem_mode]
4082             = memory_address_addr_space_p (mem_mode, addr, as);
4083
4084           if (has_predec[mem_mode])
4085             data->ainc_costs[AINC_PRE_DEC]
4086               = address_cost (addr, mem_mode, as, speed);
4087         }
4088       if (USE_LOAD_POST_DECREMENT (mem_mode)
4089           || USE_STORE_POST_DECREMENT (mem_mode))
4090         {
4091           addr = gen_rtx_POST_DEC (address_mode, reg0);
4092           has_postdec[mem_mode]
4093             = memory_address_addr_space_p (mem_mode, addr, as);
4094
4095           if (has_postdec[mem_mode])
4096             data->ainc_costs[AINC_POST_DEC]
4097               = address_cost (addr, mem_mode, as, speed);
4098         }
4099       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4100           || USE_STORE_PRE_DECREMENT (mem_mode))
4101         {
4102           addr = gen_rtx_PRE_INC (address_mode, reg0);
4103           has_preinc[mem_mode]
4104             = memory_address_addr_space_p (mem_mode, addr, as);
4105
4106           if (has_preinc[mem_mode])
4107             data->ainc_costs[AINC_PRE_INC]
4108               = address_cost (addr, mem_mode, as, speed);
4109         }
4110       if (USE_LOAD_POST_INCREMENT (mem_mode)
4111           || USE_STORE_POST_INCREMENT (mem_mode))
4112         {
4113           addr = gen_rtx_POST_INC (address_mode, reg0);
4114           has_postinc[mem_mode]
4115             = memory_address_addr_space_p (mem_mode, addr, as);
4116
4117           if (has_postinc[mem_mode])
4118             data->ainc_costs[AINC_POST_INC]
4119               = address_cost (addr, mem_mode, as, speed);
4120         }
4121       for (i = 0; i < 16; i++)
4122         {
4123           sym_p = i & 1;
4124           var_p = (i >> 1) & 1;
4125           off_p = (i >> 2) & 1;
4126           rat_p = (i >> 3) & 1;
4127
4128           addr = reg0;
4129           if (rat_p)
4130             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4131                                    gen_int_mode (rat, address_mode));
4132
4133           if (var_p)
4134             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4135
4136           if (sym_p)
4137             {
4138               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4139               /* ??? We can run into trouble with some backends by presenting
4140                  it with symbols which haven't been properly passed through
4141                  targetm.encode_section_info.  By setting the local bit, we
4142                  enhance the probability of things working.  */
4143               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4144
4145               if (off_p)
4146                 base = gen_rtx_fmt_e (CONST, address_mode,
4147                                       gen_rtx_fmt_ee
4148                                         (PLUS, address_mode, base,
4149                                          gen_int_mode (off, address_mode)));
4150             }
4151           else if (off_p)
4152             base = gen_int_mode (off, address_mode);
4153           else
4154             base = NULL_RTX;
4155
4156           if (base)
4157             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4158
4159           start_sequence ();
4160           /* To avoid splitting addressing modes, pretend that no cse will
4161              follow.  */
4162           old_cse_not_expected = cse_not_expected;
4163           cse_not_expected = true;
4164           addr = memory_address_addr_space (mem_mode, addr, as);
4165           cse_not_expected = old_cse_not_expected;
4166           seq = get_insns ();
4167           end_sequence ();
4168
4169           acost = seq_cost (seq, speed);
4170           acost += address_cost (addr, mem_mode, as, speed);
4171
4172           if (!acost)
4173             acost = 1;
4174           data->costs[sym_p][var_p][off_p][rat_p] = acost;
4175         }
4176
4177       /* On some targets, it is quite expensive to load symbol to a register,
4178          which makes addresses that contain symbols look much more expensive.
4179          However, the symbol will have to be loaded in any case before the
4180          loop (and quite likely we have it in register already), so it does not
4181          make much sense to penalize them too heavily.  So make some final
4182          tweaks for the SYMBOL_PRESENT modes:
4183
4184          If VAR_PRESENT is false, and the mode obtained by changing symbol to
4185          var is cheaper, use this mode with small penalty.
4186          If VAR_PRESENT is true, try whether the mode with
4187          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4188          if this is the case, use it.  */
4189       add_c = add_cost (speed, address_mode);
4190       for (i = 0; i < 8; i++)
4191         {
4192           var_p = i & 1;
4193           off_p = (i >> 1) & 1;
4194           rat_p = (i >> 2) & 1;
4195
4196           acost = data->costs[0][1][off_p][rat_p] + 1;
4197           if (var_p)
4198             acost += add_c;
4199
4200           if (acost < data->costs[1][var_p][off_p][rat_p])
4201             data->costs[1][var_p][off_p][rat_p] = acost;
4202         }
4203
4204       if (dump_file && (dump_flags & TDF_DETAILS))
4205         {
4206           fprintf (dump_file, "<Address Costs>:\n");
4207
4208           for (i = 0; i < 16; i++)
4209             {
4210               sym_p = i & 1;
4211               var_p = (i >> 1) & 1;
4212               off_p = (i >> 2) & 1;
4213               rat_p = (i >> 3) & 1;
4214
4215               fprintf (dump_file, "  ");
4216               if (sym_p)
4217                 fprintf (dump_file, "sym + ");
4218               if (var_p)
4219                 fprintf (dump_file, "var + ");
4220               if (off_p)
4221                 fprintf (dump_file, "cst + ");
4222               if (rat_p)
4223                 fprintf (dump_file, "rat * ");
4224
4225               acost = data->costs[sym_p][var_p][off_p][rat_p];
4226               fprintf (dump_file, "index costs %d\n", acost);
4227             }
4228           if (has_predec[mem_mode] || has_postdec[mem_mode]
4229               || has_preinc[mem_mode] || has_postinc[mem_mode])
4230             fprintf (dump_file, "  May include autoinc/dec\n");
4231           fprintf (dump_file, "\n");
4232         }
4233
4234       address_cost_data_list[data_index] = data;
4235     }
4236
4237   bits = GET_MODE_BITSIZE (address_mode);
4238   mask = ~(HOST_WIDE_INT_M1U << (bits - 1) << 1);
4239   offset &= mask;
4240   if ((offset >> (bits - 1) & 1))
4241     offset |= ~mask;
4242   s_offset = offset;
4243
4244   autoinc = false;
4245   autoinc_type = AINC_NONE;
4246   msize = GET_MODE_SIZE (mem_mode);
4247   autoinc_offset = offset;
4248   if (stmt_after_inc)
4249     autoinc_offset += ratio * cstep;
4250   if (symbol_present || var_present || ratio != 1)
4251     autoinc = false;
4252   else
4253     {
4254       if (has_postinc[mem_mode] && autoinc_offset == 0
4255           && msize == cstep)
4256         autoinc_type = AINC_POST_INC;
4257       else if (has_postdec[mem_mode] && autoinc_offset == 0
4258                && msize == -cstep)
4259         autoinc_type = AINC_POST_DEC;
4260       else if (has_preinc[mem_mode] && autoinc_offset == msize
4261                && msize == cstep)
4262         autoinc_type = AINC_PRE_INC;
4263       else if (has_predec[mem_mode] && autoinc_offset == -msize
4264                && msize == -cstep)
4265         autoinc_type = AINC_PRE_DEC;
4266
4267       if (autoinc_type != AINC_NONE)
4268         autoinc = true;
4269     }
4270
4271   cost = 0;
4272   offset_p = (s_offset != 0
4273               && data->min_offset <= s_offset
4274               && s_offset <= data->max_offset);
4275   ratio_p = (ratio != 1
4276              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4277
4278   if (ratio != 1 && !ratio_p)
4279     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4280
4281   if (s_offset && !offset_p && !symbol_present)
4282     cost += add_cost (speed, address_mode);
4283
4284   if (may_autoinc)
4285     *may_autoinc = autoinc;
4286   if (autoinc)
4287     acost = data->ainc_costs[autoinc_type];
4288   else
4289     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4290   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4291   return comp_cost (cost + acost, complexity);
4292 }
4293
4294  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4295     EXPR operand holding the shift.  COST0 and COST1 are the costs for
4296     calculating the operands of EXPR.  Returns true if successful, and returns
4297     the cost in COST.  */
4298
4299 static bool
4300 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4301                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4302 {
4303   comp_cost res;
4304   tree op1 = TREE_OPERAND (expr, 1);
4305   tree cst = TREE_OPERAND (mult, 1);
4306   tree multop = TREE_OPERAND (mult, 0);
4307   int m = exact_log2 (int_cst_value (cst));
4308   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4309   int as_cost, sa_cost;
4310   bool mult_in_op1;
4311
4312   if (!(m >= 0 && m < maxm))
4313     return false;
4314
4315   STRIP_NOPS (op1);
4316   mult_in_op1 = operand_equal_p (op1, mult, 0);
4317
4318   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4319
4320   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4321      use that in preference to a shift insn followed by an add insn.  */
4322   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4323              ? shiftadd_cost (speed, mode, m)
4324              : (mult_in_op1
4325                 ? shiftsub1_cost (speed, mode, m)
4326                 : shiftsub0_cost (speed, mode, m)));
4327
4328   res = comp_cost (MIN (as_cost, sa_cost), 0);
4329   res += (mult_in_op1 ? cost0 : cost1);
4330
4331   STRIP_NOPS (multop);
4332   if (!is_gimple_val (multop))
4333     res += force_expr_to_var_cost (multop, speed);
4334
4335   *cost = res;
4336   return true;
4337 }
4338
4339 /* Estimates cost of forcing expression EXPR into a variable.  */
4340
4341 static comp_cost
4342 force_expr_to_var_cost (tree expr, bool speed)
4343 {
4344   static bool costs_initialized = false;
4345   static unsigned integer_cost [2];
4346   static unsigned symbol_cost [2];
4347   static unsigned address_cost [2];
4348   tree op0, op1;
4349   comp_cost cost0, cost1, cost;
4350   machine_mode mode;
4351
4352   if (!costs_initialized)
4353     {
4354       tree type = build_pointer_type (integer_type_node);
4355       tree var, addr;
4356       rtx x;
4357       int i;
4358
4359       var = create_tmp_var_raw (integer_type_node, "test_var");
4360       TREE_STATIC (var) = 1;
4361       x = produce_memory_decl_rtl (var, NULL);
4362       SET_DECL_RTL (var, x);
4363
4364       addr = build1 (ADDR_EXPR, type, var);
4365
4366
4367       for (i = 0; i < 2; i++)
4368         {
4369           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4370                                                              2000), i);
4371
4372           symbol_cost[i] = computation_cost (addr, i) + 1;
4373
4374           address_cost[i]
4375             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4376           if (dump_file && (dump_flags & TDF_DETAILS))
4377             {
4378               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4379               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4380               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4381               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4382               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4383               fprintf (dump_file, "\n");
4384             }
4385         }
4386
4387       costs_initialized = true;
4388     }
4389
4390   STRIP_NOPS (expr);
4391
4392   if (SSA_VAR_P (expr))
4393     return no_cost;
4394
4395   if (is_gimple_min_invariant (expr))
4396     {
4397       if (TREE_CODE (expr) == INTEGER_CST)
4398         return comp_cost (integer_cost [speed], 0);
4399
4400       if (TREE_CODE (expr) == ADDR_EXPR)
4401         {
4402           tree obj = TREE_OPERAND (expr, 0);
4403
4404           if (VAR_P (obj)
4405               || TREE_CODE (obj) == PARM_DECL
4406               || TREE_CODE (obj) == RESULT_DECL)
4407             return comp_cost (symbol_cost [speed], 0);
4408         }
4409
4410       return comp_cost (address_cost [speed], 0);
4411     }
4412
4413   switch (TREE_CODE (expr))
4414     {
4415     case POINTER_PLUS_EXPR:
4416     case PLUS_EXPR:
4417     case MINUS_EXPR:
4418     case MULT_EXPR:
4419       op0 = TREE_OPERAND (expr, 0);
4420       op1 = TREE_OPERAND (expr, 1);
4421       STRIP_NOPS (op0);
4422       STRIP_NOPS (op1);
4423       break;
4424
4425     CASE_CONVERT:
4426     case NEGATE_EXPR:
4427       op0 = TREE_OPERAND (expr, 0);
4428       STRIP_NOPS (op0);
4429       op1 = NULL_TREE;
4430       break;
4431
4432     default:
4433       /* Just an arbitrary value, FIXME.  */
4434       return comp_cost (target_spill_cost[speed], 0);
4435     }
4436
4437   if (op0 == NULL_TREE
4438       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4439     cost0 = no_cost;
4440   else
4441     cost0 = force_expr_to_var_cost (op0, speed);
4442
4443   if (op1 == NULL_TREE
4444       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4445     cost1 = no_cost;
4446   else
4447     cost1 = force_expr_to_var_cost (op1, speed);
4448
4449   mode = TYPE_MODE (TREE_TYPE (expr));
4450   switch (TREE_CODE (expr))
4451     {
4452     case POINTER_PLUS_EXPR:
4453     case PLUS_EXPR:
4454     case MINUS_EXPR:
4455     case NEGATE_EXPR:
4456       cost = comp_cost (add_cost (speed, mode), 0);
4457       if (TREE_CODE (expr) != NEGATE_EXPR)
4458         {
4459           tree mult = NULL_TREE;
4460           comp_cost sa_cost;
4461           if (TREE_CODE (op1) == MULT_EXPR)
4462             mult = op1;
4463           else if (TREE_CODE (op0) == MULT_EXPR)
4464             mult = op0;
4465
4466           if (mult != NULL_TREE
4467               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4468               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4469                                     speed, &sa_cost))
4470             return sa_cost;
4471         }
4472       break;
4473
4474     CASE_CONVERT:
4475       {
4476         tree inner_mode, outer_mode;
4477         outer_mode = TREE_TYPE (expr);
4478         inner_mode = TREE_TYPE (op0);
4479         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4480                                        TYPE_MODE (inner_mode), speed), 0);
4481       }
4482       break;
4483
4484     case MULT_EXPR:
4485       if (cst_and_fits_in_hwi (op0))
4486         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4487                                              mode, speed), 0);
4488       else if (cst_and_fits_in_hwi (op1))
4489         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4490                                              mode, speed), 0);
4491       else
4492         return comp_cost (target_spill_cost [speed], 0);
4493       break;
4494
4495     default:
4496       gcc_unreachable ();
4497     }
4498
4499   cost += cost0;
4500   cost += cost1;
4501
4502   /* Bound the cost by target_spill_cost.  The parts of complicated
4503      computations often are either loop invariant or at least can
4504      be shared between several iv uses, so letting this grow without
4505      limits would not give reasonable results.  */
4506   if (cost.cost > (int) target_spill_cost [speed])
4507     cost.cost = target_spill_cost [speed];
4508
4509   return cost;
4510 }
4511
4512 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4513    invariants the computation depends on.  */
4514
4515 static comp_cost
4516 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4517 {
4518   if (!expr)
4519     return no_cost;
4520
4521   find_inv_vars (data, &expr, inv_vars);
4522   return force_expr_to_var_cost (expr, data->speed);
4523 }
4524
4525 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4526    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4527    to false if the corresponding part is missing.  inv_vars is a set of the
4528    invariants the computation depends on.  */
4529
4530 static comp_cost
4531 split_address_cost (struct ivopts_data *data,
4532                     tree addr, bool *symbol_present, bool *var_present,
4533                     unsigned HOST_WIDE_INT *offset, bitmap *inv_vars)
4534 {
4535   tree core;
4536   HOST_WIDE_INT bitsize;
4537   HOST_WIDE_INT bitpos;
4538   tree toffset;
4539   machine_mode mode;
4540   int unsignedp, reversep, volatilep;
4541
4542   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4543                               &unsignedp, &reversep, &volatilep);
4544
4545   if (toffset != 0
4546       || bitpos % BITS_PER_UNIT != 0
4547       || reversep
4548       || !VAR_P (core))
4549     {
4550       *symbol_present = false;
4551       *var_present = true;
4552       find_inv_vars (data, &addr, inv_vars);
4553       return comp_cost (target_spill_cost[data->speed], 0);
4554     }
4555
4556   *offset += bitpos / BITS_PER_UNIT;
4557   if (TREE_STATIC (core)
4558       || DECL_EXTERNAL (core))
4559     {
4560       *symbol_present = true;
4561       *var_present = false;
4562       return no_cost;
4563     }
4564
4565   *symbol_present = false;
4566   *var_present = true;
4567   return no_cost;
4568 }
4569
4570 /* Estimates cost of expressing difference of addresses E1 - E2 as
4571    var + symbol + offset.  The value of offset is added to OFFSET,
4572    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4573    part is missing.  inv_vars is a set of the invariants the computation
4574    depends on.  */
4575
4576 static comp_cost
4577 ptr_difference_cost (struct ivopts_data *data,
4578                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4579                      unsigned HOST_WIDE_INT *offset, bitmap *inv_vars)
4580 {
4581   HOST_WIDE_INT diff = 0;
4582   aff_tree aff_e1, aff_e2;
4583   tree type;
4584
4585   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4586
4587   if (ptr_difference_const (e1, e2, &diff))
4588     {
4589       *offset += diff;
4590       *symbol_present = false;
4591       *var_present = false;
4592       return no_cost;
4593     }
4594
4595   if (integer_zerop (e2))
4596     return split_address_cost (data, TREE_OPERAND (e1, 0),
4597                                symbol_present, var_present, offset, inv_vars);
4598
4599   *symbol_present = false;
4600   *var_present = true;
4601
4602   type = signed_type_for (TREE_TYPE (e1));
4603   tree_to_aff_combination (e1, type, &aff_e1);
4604   tree_to_aff_combination (e2, type, &aff_e2);
4605   aff_combination_scale (&aff_e2, -1);
4606   aff_combination_add (&aff_e1, &aff_e2);
4607
4608   return force_var_cost (data, aff_combination_to_tree (&aff_e1), inv_vars);
4609 }
4610
4611 /* Estimates cost of expressing difference E1 - E2 as
4612    var + symbol + offset.  The value of offset is added to OFFSET,
4613    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4614    part is missing.  INV_VARS is a set of the invariants the computation
4615    depends on.  */
4616
4617 static comp_cost
4618 difference_cost (struct ivopts_data *data,
4619                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4620                  unsigned HOST_WIDE_INT *offset, bitmap *inv_vars)
4621 {
4622   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4623   unsigned HOST_WIDE_INT off1, off2;
4624   aff_tree aff_e1, aff_e2;
4625   tree type;
4626
4627   e1 = strip_offset (e1, &off1);
4628   e2 = strip_offset (e2, &off2);
4629   *offset += off1 - off2;
4630
4631   STRIP_NOPS (e1);
4632   STRIP_NOPS (e2);
4633
4634   if (TREE_CODE (e1) == ADDR_EXPR)
4635     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4636                                 offset, inv_vars);
4637   *symbol_present = false;
4638
4639   if (operand_equal_p (e1, e2, 0))
4640     {
4641       *var_present = false;
4642       return no_cost;
4643     }
4644
4645   *var_present = true;
4646
4647   if (integer_zerop (e2))
4648     return force_var_cost (data, e1, inv_vars);
4649
4650   if (integer_zerop (e1))
4651     {
4652       comp_cost cost = force_var_cost (data, e2, inv_vars);
4653       cost += mult_by_coeff_cost (-1, mode, data->speed);
4654       return cost;
4655     }
4656
4657   type = signed_type_for (TREE_TYPE (e1));
4658   tree_to_aff_combination (e1, type, &aff_e1);
4659   tree_to_aff_combination (e2, type, &aff_e2);
4660   aff_combination_scale (&aff_e2, -1);
4661   aff_combination_add (&aff_e1, &aff_e2);
4662
4663   return force_var_cost (data, aff_combination_to_tree (&aff_e1), inv_vars);
4664 }
4665
4666 /* Returns true if AFF1 and AFF2 are identical.  */
4667
4668 static bool
4669 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4670 {
4671   unsigned i;
4672
4673   if (aff1->n != aff2->n)
4674     return false;
4675
4676   for (i = 0; i < aff1->n; i++)
4677     {
4678       if (aff1->elts[i].coef != aff2->elts[i].coef)
4679         return false;
4680
4681       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4682         return false;
4683     }
4684   return true;
4685 }
4686
4687 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
4688
4689 static iv_inv_expr_ent *
4690 record_inv_expr (struct ivopts_data *data, tree expr)
4691 {
4692   struct iv_inv_expr_ent ent;
4693   struct iv_inv_expr_ent **slot;
4694
4695   ent.expr = expr;
4696   ent.hash = iterative_hash_expr (expr, 0);
4697   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4698
4699   if (!*slot)
4700     {
4701       *slot = XNEW (struct iv_inv_expr_ent);
4702       (*slot)->expr = expr;
4703       (*slot)->hash = ent.hash;
4704       (*slot)->id = data->max_inv_expr_id++;
4705     }
4706
4707   return *slot;
4708 }
4709
4710 /* Returns the invariant expression if expression UBASE - RATIO * CBASE
4711    requires a new compiler generated temporary.  Returns -1 otherwise.
4712    ADDRESS_P is a flag indicating if the expression is for address
4713    computation.  */
4714
4715 static iv_inv_expr_ent *
4716 get_loop_invariant_expr (struct ivopts_data *data, tree ubase,
4717                          tree cbase, HOST_WIDE_INT ratio,
4718                          bool address_p)
4719 {
4720   aff_tree ubase_aff, cbase_aff;
4721   tree expr, ub, cb;
4722
4723   STRIP_NOPS (ubase);
4724   STRIP_NOPS (cbase);
4725   ub = ubase;
4726   cb = cbase;
4727
4728   if ((TREE_CODE (ubase) == INTEGER_CST)
4729       && (TREE_CODE (cbase) == INTEGER_CST))
4730     return NULL;
4731
4732   /* Strips the constant part. */
4733   if (TREE_CODE (ubase) == PLUS_EXPR
4734       || TREE_CODE (ubase) == MINUS_EXPR
4735       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4736     {
4737       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4738         ubase = TREE_OPERAND (ubase, 0);
4739     }
4740
4741   /* Strips the constant part. */
4742   if (TREE_CODE (cbase) == PLUS_EXPR
4743       || TREE_CODE (cbase) == MINUS_EXPR
4744       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4745     {
4746       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4747         cbase = TREE_OPERAND (cbase, 0);
4748     }
4749
4750   if (address_p)
4751     {
4752       if (((TREE_CODE (ubase) == SSA_NAME)
4753            || (TREE_CODE (ubase) == ADDR_EXPR
4754                && is_gimple_min_invariant (ubase)))
4755           && (TREE_CODE (cbase) == INTEGER_CST))
4756         return NULL;
4757
4758       if (((TREE_CODE (cbase) == SSA_NAME)
4759            || (TREE_CODE (cbase) == ADDR_EXPR
4760                && is_gimple_min_invariant (cbase)))
4761           && (TREE_CODE (ubase) == INTEGER_CST))
4762         return NULL;
4763     }
4764
4765   if (ratio == 1)
4766     {
4767       if (operand_equal_p (ubase, cbase, 0))
4768         return NULL;
4769
4770       if (TREE_CODE (ubase) == ADDR_EXPR
4771           && TREE_CODE (cbase) == ADDR_EXPR)
4772         {
4773           tree usym, csym;
4774
4775           usym = TREE_OPERAND (ubase, 0);
4776           csym = TREE_OPERAND (cbase, 0);
4777           if (TREE_CODE (usym) == ARRAY_REF)
4778             {
4779               tree ind = TREE_OPERAND (usym, 1);
4780               if (TREE_CODE (ind) == INTEGER_CST
4781                   && tree_fits_shwi_p (ind)
4782                   && tree_to_shwi (ind) == 0)
4783                 usym = TREE_OPERAND (usym, 0);
4784             }
4785           if (TREE_CODE (csym) == ARRAY_REF)
4786             {
4787               tree ind = TREE_OPERAND (csym, 1);
4788               if (TREE_CODE (ind) == INTEGER_CST
4789                   && tree_fits_shwi_p (ind)
4790                   && tree_to_shwi (ind) == 0)
4791                 csym = TREE_OPERAND (csym, 0);
4792             }
4793           if (operand_equal_p (usym, csym, 0))
4794             return NULL;
4795         }
4796       /* Now do more complex comparison  */
4797       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4798       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4799       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4800         return NULL;
4801     }
4802
4803   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4804   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4805
4806   aff_combination_scale (&cbase_aff, -1 * ratio);
4807   aff_combination_add (&ubase_aff, &cbase_aff);
4808   expr = aff_combination_to_tree (&ubase_aff);
4809   return record_inv_expr (data, expr);
4810 }
4811
4812 /* Scale (multiply) the computed COST (except scratch part that should be
4813    hoisted out a loop) by header->frequency / AT->frequency, which makes
4814    expected cost more accurate.  */
4815
4816 static comp_cost
4817 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4818 {
4819    int loop_freq = data->current_loop->header->frequency;
4820    int bb_freq = gimple_bb (at)->frequency;
4821    if (loop_freq != 0)
4822      {
4823        gcc_assert (cost.scratch <= cost.cost);
4824        int scaled_cost
4825          = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4826
4827        if (dump_file && (dump_flags & TDF_DETAILS))
4828          fprintf (dump_file, "Scaling cost based on bb prob "
4829                   "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4830                   1.0f * bb_freq / loop_freq, cost.cost,
4831                   cost.scratch, scaled_cost, bb_freq, loop_freq);
4832
4833        cost.cost = scaled_cost;
4834      }
4835
4836   return cost;
4837 }
4838
4839 /* Determines the cost of the computation by that USE is expressed
4840    from induction variable CAND.  If ADDRESS_P is true, we just need
4841    to create an address from it, otherwise we want to get it into
4842    register.  A set of invariants we depend on is stored in INV_VARS.
4843    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4844    addressing is likely.  If INV_EXPR is nonnull, record invariant
4845    expr entry in it.  */
4846
4847 static comp_cost
4848 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4849                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4850                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4851 {
4852   gimple *at = use->stmt;
4853   tree ubase = use->iv->base, ustep = use->iv->step;
4854   tree cbase, cstep;
4855   tree utype = TREE_TYPE (ubase), ctype;
4856   unsigned HOST_WIDE_INT cstepi, offset = 0;
4857   HOST_WIDE_INT ratio, aratio;
4858   bool var_present, symbol_present, stmt_is_after_inc;
4859   comp_cost cost;
4860   widest_int rat;
4861   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4862   machine_mode mem_mode = (address_p
4863                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4864                                 : VOIDmode);
4865
4866   if (inv_vars)
4867     *inv_vars = NULL;
4868
4869   cbase = cand->iv->base;
4870   cstep = cand->iv->step;
4871   ctype = TREE_TYPE (cbase);
4872
4873   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4874     {
4875       /* We do not have a precision to express the values of use.  */
4876       return infinite_cost;
4877     }
4878
4879   if (address_p
4880       || (use->iv->base_object
4881           && cand->iv->base_object
4882           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4883           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4884     {
4885       /* Do not try to express address of an object with computation based
4886          on address of a different object.  This may cause problems in rtl
4887          level alias analysis (that does not expect this to be happening,
4888          as this is illegal in C), and would be unlikely to be useful
4889          anyway.  */
4890       if (use->iv->base_object
4891           && cand->iv->base_object
4892           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4893         return infinite_cost;
4894     }
4895
4896   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4897     {
4898       /* TODO -- add direct handling of this case.  */
4899       goto fallback;
4900     }
4901
4902   /* CSTEPI is removed from the offset in case statement is after the
4903      increment.  If the step is not constant, we use zero instead.
4904      This is a bit imprecise (there is the extra addition), but
4905      redundancy elimination is likely to transform the code so that
4906      it uses value of the variable before increment anyway,
4907      so it is not that much unrealistic.  */
4908   if (cst_and_fits_in_hwi (cstep))
4909     cstepi = int_cst_value (cstep);
4910   else
4911     cstepi = 0;
4912
4913   if (!constant_multiple_of (ustep, cstep, &rat))
4914     return infinite_cost;
4915
4916   if (wi::fits_shwi_p (rat))
4917     ratio = rat.to_shwi ();
4918   else
4919     return infinite_cost;
4920
4921   STRIP_NOPS (cbase);
4922   ctype = TREE_TYPE (cbase);
4923
4924   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4925
4926   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4927      or ratio == 1, it is better to handle this like
4928
4929      ubase - ratio * cbase + ratio * var
4930
4931      (also holds in the case ratio == -1, TODO.  */
4932
4933   if (cst_and_fits_in_hwi (cbase))
4934     {
4935       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4936       cost = difference_cost (data,
4937                               ubase, build_int_cst (utype, 0),
4938                               &symbol_present, &var_present, &offset,
4939                               inv_vars);
4940       cost /= avg_loop_niter (data->current_loop);
4941     }
4942   else if (ratio == 1)
4943     {
4944       tree real_cbase = cbase;
4945
4946       /* Check to see if any adjustment is needed.  */
4947       if (cstepi == 0 && stmt_is_after_inc)
4948         {
4949           aff_tree real_cbase_aff;
4950           aff_tree cstep_aff;
4951
4952           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4953                                    &real_cbase_aff);
4954           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4955
4956           aff_combination_add (&real_cbase_aff, &cstep_aff);
4957           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4958         }
4959
4960       cost = difference_cost (data,
4961                               ubase, real_cbase,
4962                               &symbol_present, &var_present, &offset,
4963                               inv_vars);
4964       cost /= avg_loop_niter (data->current_loop);
4965     }
4966   else if (address_p
4967            && !POINTER_TYPE_P (ctype)
4968            && multiplier_allowed_in_address_p
4969                 (ratio, mem_mode,
4970                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4971     {
4972       tree real_cbase = cbase;
4973
4974       if (cstepi == 0 && stmt_is_after_inc)
4975         {
4976           if (POINTER_TYPE_P (ctype))
4977             real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4978           else
4979             real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4980         }
4981       real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
4982                                 build_int_cst (ctype, ratio));
4983       cost = difference_cost (data,
4984                               ubase, real_cbase,
4985                               &symbol_present, &var_present, &offset,
4986                               inv_vars);
4987       cost /= avg_loop_niter (data->current_loop);
4988     }
4989   else
4990     {
4991       cost = force_var_cost (data, cbase, inv_vars);
4992       cost += difference_cost (data, ubase, build_int_cst (utype, 0),
4993                                &symbol_present, &var_present, &offset,
4994                                inv_vars);
4995       cost /= avg_loop_niter (data->current_loop);
4996       cost += add_cost (data->speed, TYPE_MODE (ctype));
4997     }
4998
4999   /* Record setup cost in scratch field.  */
5000   cost.scratch = cost.cost;
5001
5002   if (inv_expr && inv_vars && *inv_vars)
5003     {
5004       *inv_expr = get_loop_invariant_expr (data, ubase, cbase, ratio,
5005                                            address_p);
5006       /* Clear depends on.  */
5007       if (*inv_expr != NULL)
5008         bitmap_clear (*inv_vars);
5009     }
5010
5011   /* If we are after the increment, the value of the candidate is higher by
5012      one iteration.  */
5013   if (stmt_is_after_inc)
5014     offset -= ratio * cstepi;
5015
5016   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
5017      (symbol/var1/const parts may be omitted).  If we are looking for an
5018      address, find the cost of addressing this.  */
5019   if (address_p)
5020     {
5021       cost += get_address_cost (symbol_present, var_present,
5022                                 offset, ratio, cstepi,
5023                                 mem_mode,
5024                                 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
5025                                 speed, stmt_is_after_inc, can_autoinc);
5026       return get_scaled_computation_cost_at (data, at, cost);
5027     }
5028
5029   /* Otherwise estimate the costs for computing the expression.  */
5030   if (!symbol_present && !var_present && !offset)
5031     {
5032       if (ratio != 1)
5033         cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
5034       return get_scaled_computation_cost_at (data, at, cost);
5035     }
5036
5037   /* Symbol + offset should be compile-time computable so consider that they
5038       are added once to the variable, if present.  */
5039   if (var_present && (symbol_present || offset))
5040     cost += adjust_setup_cost (data,
5041                                     add_cost (speed, TYPE_MODE (ctype)));
5042
5043   /* Having offset does not affect runtime cost in case it is added to
5044      symbol, but it increases complexity.  */
5045   if (offset)
5046     cost.complexity++;
5047
5048   cost += add_cost (speed, TYPE_MODE (ctype));
5049
5050   aratio = ratio > 0 ? ratio : -ratio;
5051   if (aratio != 1)
5052     cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5053
5054   return get_scaled_computation_cost_at (data, at, cost);
5055
5056 fallback:
5057   if (can_autoinc)
5058     *can_autoinc = false;
5059
5060   /* Just get the expression, expand it and measure the cost.  */
5061   tree comp = get_computation_at (data->current_loop, at, use, cand);
5062
5063   if (!comp)
5064     return infinite_cost;
5065
5066   if (address_p)
5067     comp = build_simple_mem_ref (comp);
5068
5069   cost = comp_cost (computation_cost (comp, speed), 0);
5070
5071   return get_scaled_computation_cost_at (data, at, cost);
5072 }
5073
5074 /* Determines cost of computing the use in GROUP with CAND in a generic
5075    expression.  */
5076
5077 static bool
5078 determine_group_iv_cost_generic (struct ivopts_data *data,
5079                                  struct iv_group *group, struct iv_cand *cand)
5080 {
5081   comp_cost cost;
5082   iv_inv_expr_ent *inv_expr = NULL;
5083   bitmap inv_vars = NULL, inv_exprs = NULL;
5084   struct iv_use *use = group->vuses[0];
5085
5086   /* The simple case first -- if we need to express value of the preserved
5087      original biv, the cost is 0.  This also prevents us from counting the
5088      cost of increment twice -- once at this use and once in the cost of
5089      the candidate.  */
5090   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5091     cost = no_cost;
5092   else
5093     cost = get_computation_cost (data, use, cand, false,
5094                                  &inv_vars, NULL, &inv_expr);
5095
5096   if (inv_expr)
5097     {
5098       inv_exprs = BITMAP_ALLOC (NULL);
5099       bitmap_set_bit (inv_exprs, inv_expr->id);
5100     }
5101   set_group_iv_cost (data, group, cand, cost, inv_vars,
5102                      NULL_TREE, ERROR_MARK, inv_exprs);
5103   return !cost.infinite_cost_p ();
5104 }
5105
5106 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
5107
5108 static bool
5109 determine_group_iv_cost_address (struct ivopts_data *data,
5110                                  struct iv_group *group, struct iv_cand *cand)
5111 {
5112   unsigned i;
5113   bitmap inv_vars = NULL, inv_exprs = NULL;
5114   bool can_autoinc;
5115   iv_inv_expr_ent *inv_expr = NULL;
5116   struct iv_use *use = group->vuses[0];
5117   comp_cost sum_cost = no_cost, cost;
5118
5119   cost = get_computation_cost (data, use, cand, true,
5120                                &inv_vars, &can_autoinc, &inv_expr);
5121
5122   if (inv_expr)
5123     {
5124       inv_exprs = BITMAP_ALLOC (NULL);
5125       bitmap_set_bit (inv_exprs, inv_expr->id);
5126     }
5127   sum_cost = cost;
5128   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5129     {
5130       if (can_autoinc)
5131         sum_cost -= cand->cost_step;
5132       /* If we generated the candidate solely for exploiting autoincrement
5133          opportunities, and it turns out it can't be used, set the cost to
5134          infinity to make sure we ignore it.  */
5135       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5136         sum_cost = infinite_cost;
5137     }
5138
5139   /* Uses in a group can share setup code, so only add setup cost once.  */
5140   cost -= cost.scratch;
5141   /* Compute and add costs for rest uses of this group.  */
5142   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5143     {
5144       struct iv_use *next = group->vuses[i];
5145
5146       /* TODO: We could skip computing cost for sub iv_use when it has the
5147          same cost as the first iv_use, but the cost really depends on the
5148          offset and where the iv_use is.  */
5149         cost = get_computation_cost (data, next, cand, true,
5150                                      NULL, &can_autoinc, NULL);
5151       sum_cost += cost;
5152     }
5153   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5154                      NULL_TREE, ERROR_MARK, inv_exprs);
5155
5156   return !sum_cost.infinite_cost_p ();
5157 }
5158
5159 /* Computes value of candidate CAND at position AT in iteration NITER, and
5160    stores it to VAL.  */
5161
5162 static void
5163 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5164                aff_tree *val)
5165 {
5166   aff_tree step, delta, nit;
5167   struct iv *iv = cand->iv;
5168   tree type = TREE_TYPE (iv->base);
5169   tree steptype;
5170   if (POINTER_TYPE_P (type))
5171     steptype = sizetype;
5172   else
5173     steptype = unsigned_type_for (type);
5174
5175   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5176   aff_combination_convert (&step, steptype);
5177   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5178   aff_combination_convert (&nit, steptype);
5179   aff_combination_mult (&nit, &step, &delta);
5180   if (stmt_after_increment (loop, cand, at))
5181     aff_combination_add (&delta, &step);
5182
5183   tree_to_aff_combination (iv->base, type, val);
5184   if (!POINTER_TYPE_P (type))
5185     aff_combination_convert (val, steptype);
5186   aff_combination_add (val, &delta);
5187 }
5188
5189 /* Returns period of induction variable iv.  */
5190
5191 static tree
5192 iv_period (struct iv *iv)
5193 {
5194   tree step = iv->step, period, type;
5195   tree pow2div;
5196
5197   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5198
5199   type = unsigned_type_for (TREE_TYPE (step));
5200   /* Period of the iv is lcm (step, type_range)/step -1,
5201      i.e., N*type_range/step - 1. Since type range is power
5202      of two, N == (step >> num_of_ending_zeros_binary (step),
5203      so the final result is
5204
5205        (type_range >> num_of_ending_zeros_binary (step)) - 1
5206
5207   */
5208   pow2div = num_ending_zeros (step);
5209
5210   period = build_low_bits_mask (type,
5211                                 (TYPE_PRECISION (type)
5212                                  - tree_to_uhwi (pow2div)));
5213
5214   return period;
5215 }
5216
5217 /* Returns the comparison operator used when eliminating the iv USE.  */
5218
5219 static enum tree_code
5220 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5221 {
5222   struct loop *loop = data->current_loop;
5223   basic_block ex_bb;
5224   edge exit;
5225
5226   ex_bb = gimple_bb (use->stmt);
5227   exit = EDGE_SUCC (ex_bb, 0);
5228   if (flow_bb_inside_loop_p (loop, exit->dest))
5229     exit = EDGE_SUCC (ex_bb, 1);
5230
5231   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5232 }
5233
5234 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5235    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5236    calculation is performed in non-wrapping type.
5237
5238    TODO: More generally, we could test for the situation that
5239          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5240          This would require knowing the sign of OFFSET.  */
5241
5242 static bool
5243 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5244 {
5245   enum tree_code code;
5246   tree e1, e2;
5247   aff_tree aff_e1, aff_e2, aff_offset;
5248
5249   if (!nowrap_type_p (TREE_TYPE (base)))
5250     return false;
5251
5252   base = expand_simple_operations (base);
5253
5254   if (TREE_CODE (base) == SSA_NAME)
5255     {
5256       gimple *stmt = SSA_NAME_DEF_STMT (base);
5257
5258       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5259         return false;
5260
5261       code = gimple_assign_rhs_code (stmt);
5262       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5263         return false;
5264
5265       e1 = gimple_assign_rhs1 (stmt);
5266       e2 = gimple_assign_rhs2 (stmt);
5267     }
5268   else
5269     {
5270       code = TREE_CODE (base);
5271       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5272         return false;
5273       e1 = TREE_OPERAND (base, 0);
5274       e2 = TREE_OPERAND (base, 1);
5275     }
5276
5277   /* Use affine expansion as deeper inspection to prove the equality.  */
5278   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5279                                   &aff_e2, &data->name_expansion_cache);
5280   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5281                                   &aff_offset, &data->name_expansion_cache);
5282   aff_combination_scale (&aff_offset, -1);
5283   switch (code)
5284     {
5285     case PLUS_EXPR:
5286       aff_combination_add (&aff_e2, &aff_offset);
5287       if (aff_combination_zero_p (&aff_e2))
5288         return true;
5289
5290       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5291                                       &aff_e1, &data->name_expansion_cache);
5292       aff_combination_add (&aff_e1, &aff_offset);
5293       return aff_combination_zero_p (&aff_e1);
5294
5295     case POINTER_PLUS_EXPR:
5296       aff_combination_add (&aff_e2, &aff_offset);
5297       return aff_combination_zero_p (&aff_e2);
5298
5299     default:
5300       return false;
5301     }
5302 }
5303
5304 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5305    comparison with CAND.  NITER describes the number of iterations of
5306    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5307
5308    We aim to handle the following situation:
5309
5310    sometype *base, *p;
5311    int a, b, i;
5312
5313    i = a;
5314    p = p_0 = base + a;
5315
5316    do
5317      {
5318        bla (*p);
5319        p++;
5320        i++;
5321      }
5322    while (i < b);
5323
5324    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5325    We aim to optimize this to
5326
5327    p = p_0 = base + a;
5328    do
5329      {
5330        bla (*p);
5331        p++;
5332      }
5333    while (p < p_0 - a + b);
5334
5335    This preserves the correctness, since the pointer arithmetics does not
5336    overflow.  More precisely:
5337
5338    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5339       overflow in computing it or the values of p.
5340    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5341       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5342
5343 static bool
5344 iv_elimination_compare_lt (struct ivopts_data *data,
5345                            struct iv_cand *cand, enum tree_code *comp_p,
5346                            struct tree_niter_desc *niter)
5347 {
5348   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5349   struct aff_tree nit, tmpa, tmpb;
5350   enum tree_code comp;
5351   HOST_WIDE_INT step;
5352
5353   /* We need to know that the candidate induction variable does not overflow.
5354      While more complex analysis may be used to prove this, for now just
5355      check that the variable appears in the original program and that it
5356      is computed in a type that guarantees no overflows.  */
5357   cand_type = TREE_TYPE (cand->iv->base);
5358   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5359     return false;
5360
5361   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5362      the calculation of the BOUND could overflow, making the comparison
5363      invalid.  */
5364   if (!data->loop_single_exit_p)
5365     return false;
5366
5367   /* We need to be able to decide whether candidate is increasing or decreasing
5368      in order to choose the right comparison operator.  */
5369   if (!cst_and_fits_in_hwi (cand->iv->step))
5370     return false;
5371   step = int_cst_value (cand->iv->step);
5372
5373   /* Check that the number of iterations matches the expected pattern:
5374      a + 1 > b ? 0 : b - a - 1.  */
5375   mbz = niter->may_be_zero;
5376   if (TREE_CODE (mbz) == GT_EXPR)
5377     {
5378       /* Handle a + 1 > b.  */
5379       tree op0 = TREE_OPERAND (mbz, 0);
5380       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5381         {
5382           a = TREE_OPERAND (op0, 0);
5383           b = TREE_OPERAND (mbz, 1);
5384         }
5385       else
5386         return false;
5387     }
5388   else if (TREE_CODE (mbz) == LT_EXPR)
5389     {
5390       tree op1 = TREE_OPERAND (mbz, 1);
5391
5392       /* Handle b < a + 1.  */
5393       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5394         {
5395           a = TREE_OPERAND (op1, 0);
5396           b = TREE_OPERAND (mbz, 0);
5397         }
5398       else
5399         return false;
5400     }
5401   else
5402     return false;
5403
5404   /* Expected number of iterations is B - A - 1.  Check that it matches
5405      the actual number, i.e., that B - A - NITER = 1.  */
5406   tree_to_aff_combination (niter->niter, nit_type, &nit);
5407   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5408   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5409   aff_combination_scale (&nit, -1);
5410   aff_combination_scale (&tmpa, -1);
5411   aff_combination_add (&tmpb, &tmpa);
5412   aff_combination_add (&tmpb, &nit);
5413   if (tmpb.n != 0 || tmpb.offset != 1)
5414     return false;
5415
5416   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5417      overflow.  */
5418   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5419                         cand->iv->step,
5420                         fold_convert (TREE_TYPE (cand->iv->step), a));
5421   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5422     return false;
5423
5424   /* Determine the new comparison operator.  */
5425   comp = step < 0 ? GT_EXPR : LT_EXPR;
5426   if (*comp_p == NE_EXPR)
5427     *comp_p = comp;
5428   else if (*comp_p == EQ_EXPR)
5429     *comp_p = invert_tree_comparison (comp, false);
5430   else
5431     gcc_unreachable ();
5432
5433   return true;
5434 }
5435
5436 /* Check whether it is possible to express the condition in USE by comparison
5437    of candidate CAND.  If so, store the value compared with to BOUND, and the
5438    comparison operator to COMP.  */
5439
5440 static bool
5441 may_eliminate_iv (struct ivopts_data *data,
5442                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5443                   enum tree_code *comp)
5444 {
5445   basic_block ex_bb;
5446   edge exit;
5447   tree period;
5448   struct loop *loop = data->current_loop;
5449   aff_tree bnd;
5450   struct tree_niter_desc *desc = NULL;
5451
5452   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5453     return false;
5454
5455   /* For now works only for exits that dominate the loop latch.
5456      TODO: extend to other conditions inside loop body.  */
5457   ex_bb = gimple_bb (use->stmt);
5458   if (use->stmt != last_stmt (ex_bb)
5459       || gimple_code (use->stmt) != GIMPLE_COND
5460       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5461     return false;
5462
5463   exit = EDGE_SUCC (ex_bb, 0);
5464   if (flow_bb_inside_loop_p (loop, exit->dest))
5465     exit = EDGE_SUCC (ex_bb, 1);
5466   if (flow_bb_inside_loop_p (loop, exit->dest))
5467     return false;
5468
5469   desc = niter_for_exit (data, exit);
5470   if (!desc)
5471     return false;
5472
5473   /* Determine whether we can use the variable to test the exit condition.
5474      This is the case iff the period of the induction variable is greater
5475      than the number of iterations for which the exit condition is true.  */
5476   period = iv_period (cand->iv);
5477
5478   /* If the number of iterations is constant, compare against it directly.  */
5479   if (TREE_CODE (desc->niter) == INTEGER_CST)
5480     {
5481       /* See cand_value_at.  */
5482       if (stmt_after_increment (loop, cand, use->stmt))
5483         {
5484           if (!tree_int_cst_lt (desc->niter, period))
5485             return false;
5486         }
5487       else
5488         {
5489           if (tree_int_cst_lt (period, desc->niter))
5490             return false;
5491         }
5492     }
5493
5494   /* If not, and if this is the only possible exit of the loop, see whether
5495      we can get a conservative estimate on the number of iterations of the
5496      entire loop and compare against that instead.  */
5497   else
5498     {
5499       widest_int period_value, max_niter;
5500
5501       max_niter = desc->max;
5502       if (stmt_after_increment (loop, cand, use->stmt))
5503         max_niter += 1;
5504       period_value = wi::to_widest (period);
5505       if (wi::gtu_p (max_niter, period_value))
5506         {
5507           /* See if we can take advantage of inferred loop bound
5508              information.  */
5509           if (data->loop_single_exit_p)
5510             {
5511               if (!max_loop_iterations (loop, &max_niter))
5512                 return false;
5513               /* The loop bound is already adjusted by adding 1.  */
5514               if (wi::gtu_p (max_niter, period_value))
5515                 return false;
5516             }
5517           else
5518             return false;
5519         }
5520     }
5521
5522   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5523
5524   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5525                          aff_combination_to_tree (&bnd));
5526   *comp = iv_elimination_compare (data, use);
5527
5528   /* It is unlikely that computing the number of iterations using division
5529      would be more profitable than keeping the original induction variable.  */
5530   if (expression_expensive_p (*bound))
5531     return false;
5532
5533   /* Sometimes, it is possible to handle the situation that the number of
5534      iterations may be zero unless additional assumptions by using <
5535      instead of != in the exit condition.
5536
5537      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5538            base the exit condition on it.  However, that is often too
5539            expensive.  */
5540   if (!integer_zerop (desc->may_be_zero))
5541     return iv_elimination_compare_lt (data, cand, comp, desc);
5542
5543   return true;
5544 }
5545
5546  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5547     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5548
5549 static int
5550 parm_decl_cost (struct ivopts_data *data, tree bound)
5551 {
5552   tree sbound = bound;
5553   STRIP_NOPS (sbound);
5554
5555   if (TREE_CODE (sbound) == SSA_NAME
5556       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5557       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5558       && data->body_includes_call)
5559     return COSTS_N_INSNS (1);
5560
5561   return 0;
5562 }
5563
5564 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5565
5566 static bool
5567 determine_group_iv_cost_cond (struct ivopts_data *data,
5568                               struct iv_group *group, struct iv_cand *cand)
5569 {
5570   tree bound = NULL_TREE;
5571   struct iv *cmp_iv;
5572   bitmap inv_exprs = NULL;
5573   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5574   comp_cost elim_cost, express_cost, cost, bound_cost;
5575   bool ok;
5576   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5577   tree *control_var, *bound_cst;
5578   enum tree_code comp = ERROR_MARK;
5579   struct iv_use *use = group->vuses[0];
5580
5581   /* Try iv elimination.  */
5582   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5583     {
5584       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5585       if (elim_cost.cost == 0)
5586         elim_cost.cost = parm_decl_cost (data, bound);
5587       else if (TREE_CODE (bound) == INTEGER_CST)
5588         elim_cost.cost = 0;
5589       /* If we replace a loop condition 'i < n' with 'p < base + n',
5590          inv_vars_elim will have 'base' and 'n' set, which implies that both
5591          'base' and 'n' will be live during the loop.    More likely,
5592          'base + n' will be loop invariant, resulting in only one live value
5593          during the loop.  So in that case we clear inv_vars_elim and set
5594          inv_expr_elim instead.  */
5595       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5596         {
5597           inv_expr_elim = record_inv_expr (data, bound);
5598           bitmap_clear (inv_vars_elim);
5599         }
5600       /* The bound is a loop invariant, so it will be only computed
5601          once.  */
5602       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5603     }
5604   else
5605     elim_cost = infinite_cost;
5606
5607   /* Try expressing the original giv.  If it is compared with an invariant,
5608      note that we cannot get rid of it.  */
5609   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5610                               NULL, &cmp_iv);
5611   gcc_assert (ok);
5612
5613   /* When the condition is a comparison of the candidate IV against
5614      zero, prefer this IV.
5615
5616      TODO: The constant that we're subtracting from the cost should
5617      be target-dependent.  This information should be added to the
5618      target costs for each backend.  */
5619   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5620       && integer_zerop (*bound_cst)
5621       && (operand_equal_p (*control_var, cand->var_after, 0)
5622           || operand_equal_p (*control_var, cand->var_before, 0)))
5623     elim_cost -= 1;
5624
5625   express_cost = get_computation_cost (data, use, cand, false,
5626                                        &inv_vars_express, NULL,
5627                                        &inv_expr_express);
5628   if (cmp_iv != NULL)
5629     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5630
5631   /* Count the cost of the original bound as well.  */
5632   bound_cost = force_var_cost (data, *bound_cst, NULL);
5633   if (bound_cost.cost == 0)
5634     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5635   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5636     bound_cost.cost = 0;
5637   express_cost += bound_cost;
5638
5639   /* Choose the better approach, preferring the eliminated IV. */
5640   if (elim_cost <= express_cost)
5641     {
5642       cost = elim_cost;
5643       inv_vars = inv_vars_elim;
5644       inv_vars_elim = NULL;
5645       inv_expr = inv_expr_elim;
5646     }
5647   else
5648     {
5649       cost = express_cost;
5650       inv_vars = inv_vars_express;
5651       inv_vars_express = NULL;
5652       bound = NULL_TREE;
5653       comp = ERROR_MARK;
5654       inv_expr = inv_expr_express;
5655     }
5656
5657   if (inv_expr)
5658     {
5659       inv_exprs = BITMAP_ALLOC (NULL);
5660       bitmap_set_bit (inv_exprs, inv_expr->id);
5661     }
5662   set_group_iv_cost (data, group, cand, cost,
5663                      inv_vars, bound, comp, inv_exprs);
5664
5665   if (inv_vars_elim)
5666     BITMAP_FREE (inv_vars_elim);
5667   if (inv_vars_express)
5668     BITMAP_FREE (inv_vars_express);
5669
5670   return !cost.infinite_cost_p ();
5671 }
5672
5673 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5674    if USE cannot be represented with CAND.  */
5675
5676 static bool
5677 determine_group_iv_cost (struct ivopts_data *data,
5678                          struct iv_group *group, struct iv_cand *cand)
5679 {
5680   switch (group->type)
5681     {
5682     case USE_NONLINEAR_EXPR:
5683       return determine_group_iv_cost_generic (data, group, cand);
5684
5685     case USE_ADDRESS:
5686       return determine_group_iv_cost_address (data, group, cand);
5687
5688     case USE_COMPARE:
5689       return determine_group_iv_cost_cond (data, group, cand);
5690
5691     default:
5692       gcc_unreachable ();
5693     }
5694 }
5695
5696 /* Return true if get_computation_cost indicates that autoincrement is
5697    a possibility for the pair of USE and CAND, false otherwise.  */
5698
5699 static bool
5700 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5701                            struct iv_cand *cand)
5702 {
5703   bitmap inv_vars;
5704   bool can_autoinc;
5705   comp_cost cost;
5706
5707   if (use->type != USE_ADDRESS)
5708     return false;
5709
5710   cost = get_computation_cost (data, use, cand, true, &inv_vars,
5711                                &can_autoinc, NULL);
5712
5713   BITMAP_FREE (inv_vars);
5714
5715   return !cost.infinite_cost_p () && can_autoinc;
5716 }
5717
5718 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5719    use that allows autoincrement, and set their AINC_USE if possible.  */
5720
5721 static void
5722 set_autoinc_for_original_candidates (struct ivopts_data *data)
5723 {
5724   unsigned i, j;
5725
5726   for (i = 0; i < data->vcands.length (); i++)
5727     {
5728       struct iv_cand *cand = data->vcands[i];
5729       struct iv_use *closest_before = NULL;
5730       struct iv_use *closest_after = NULL;
5731       if (cand->pos != IP_ORIGINAL)
5732         continue;
5733
5734       for (j = 0; j < data->vgroups.length (); j++)
5735         {
5736           struct iv_group *group = data->vgroups[j];
5737           struct iv_use *use = group->vuses[0];
5738           unsigned uid = gimple_uid (use->stmt);
5739
5740           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5741             continue;
5742
5743           if (uid < gimple_uid (cand->incremented_at)
5744               && (closest_before == NULL
5745                   || uid > gimple_uid (closest_before->stmt)))
5746             closest_before = use;
5747
5748           if (uid > gimple_uid (cand->incremented_at)
5749               && (closest_after == NULL
5750                   || uid < gimple_uid (closest_after->stmt)))
5751             closest_after = use;
5752         }
5753
5754       if (closest_before != NULL
5755           && autoinc_possible_for_pair (data, closest_before, cand))
5756         cand->ainc_use = closest_before;
5757       else if (closest_after != NULL
5758                && autoinc_possible_for_pair (data, closest_after, cand))
5759         cand->ainc_use = closest_after;
5760     }
5761 }
5762
5763 /* Finds the candidates for the induction variables.  */
5764
5765 static void
5766 find_iv_candidates (struct ivopts_data *data)
5767 {
5768   /* Add commonly used ivs.  */
5769   add_standard_iv_candidates (data);
5770
5771   /* Add old induction variables.  */
5772   add_iv_candidate_for_bivs (data);
5773
5774   /* Add induction variables derived from uses.  */
5775   add_iv_candidate_for_groups (data);
5776
5777   set_autoinc_for_original_candidates (data);
5778
5779   /* Record the important candidates.  */
5780   record_important_candidates (data);
5781
5782   if (dump_file && (dump_flags & TDF_DETAILS))
5783     {
5784       unsigned i;
5785
5786       fprintf (dump_file, "\n<Important Candidates>:\t");
5787       for (i = 0; i < data->vcands.length (); i++)
5788         if (data->vcands[i]->important)
5789           fprintf (dump_file, " %d,", data->vcands[i]->id);
5790       fprintf (dump_file, "\n");
5791
5792       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5793       for (i = 0; i < data->vgroups.length (); i++)
5794         {
5795           struct iv_group *group = data->vgroups[i];
5796
5797           if (group->related_cands)
5798             {
5799               fprintf (dump_file, "  Group %d:\t", group->id);
5800               dump_bitmap (dump_file, group->related_cands);
5801             }
5802         }
5803       fprintf (dump_file, "\n");
5804     }
5805 }
5806
5807 /* Determines costs of computing use of iv with an iv candidate.  */
5808
5809 static void
5810 determine_group_iv_costs (struct ivopts_data *data)
5811 {
5812   unsigned i, j;
5813   struct iv_cand *cand;
5814   struct iv_group *group;
5815   bitmap to_clear = BITMAP_ALLOC (NULL);
5816
5817   alloc_use_cost_map (data);
5818
5819   for (i = 0; i < data->vgroups.length (); i++)
5820     {
5821       group = data->vgroups[i];
5822
5823       if (data->consider_all_candidates)
5824         {
5825           for (j = 0; j < data->vcands.length (); j++)
5826             {
5827               cand = data->vcands[j];
5828               determine_group_iv_cost (data, group, cand);
5829             }
5830         }
5831       else
5832         {
5833           bitmap_iterator bi;
5834
5835           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5836             {
5837               cand = data->vcands[j];
5838               if (!determine_group_iv_cost (data, group, cand))
5839                 bitmap_set_bit (to_clear, j);
5840             }
5841
5842           /* Remove the candidates for that the cost is infinite from
5843              the list of related candidates.  */
5844           bitmap_and_compl_into (group->related_cands, to_clear);
5845           bitmap_clear (to_clear);
5846         }
5847     }
5848
5849   BITMAP_FREE (to_clear);
5850
5851   if (dump_file && (dump_flags & TDF_DETAILS))
5852     {
5853       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5854       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5855
5856       for (hash_table<iv_inv_expr_hasher>::iterator it
5857            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5858            ++it)
5859         list.safe_push (*it);
5860
5861       list.qsort (sort_iv_inv_expr_ent);
5862
5863       for (i = 0; i < list.length (); ++i)
5864         {
5865           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5866           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5867           fprintf (dump_file, "\n");
5868         }
5869
5870       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5871
5872       for (i = 0; i < data->vgroups.length (); i++)
5873         {
5874           group = data->vgroups[i];
5875
5876           fprintf (dump_file, "Group %d:\n", i);
5877           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5878           for (j = 0; j < group->n_map_members; j++)
5879             {
5880               if (!group->cost_map[j].cand
5881                   || group->cost_map[j].cost.infinite_cost_p ())
5882                 continue;
5883
5884               fprintf (dump_file, "  %d\t%d\t%d\t",
5885                        group->cost_map[j].cand->id,
5886                        group->cost_map[j].cost.cost,
5887                        group->cost_map[j].cost.complexity);
5888               if (!group->cost_map[j].inv_exprs
5889                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5890                 fprintf (dump_file, "NIL;\t");
5891               else
5892                 bitmap_print (dump_file,
5893                               group->cost_map[j].inv_exprs, "", ";\t");
5894               if (!group->cost_map[j].inv_vars
5895                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5896                 fprintf (dump_file, "NIL;\n");
5897               else
5898                 bitmap_print (dump_file,
5899                               group->cost_map[j].inv_vars, "", "\n");
5900             }
5901
5902           fprintf (dump_file, "\n");
5903         }
5904       fprintf (dump_file, "\n");
5905     }
5906 }
5907
5908 /* Determines cost of the candidate CAND.  */
5909
5910 static void
5911 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5912 {
5913   comp_cost cost_base;
5914   unsigned cost, cost_step;
5915   tree base;
5916
5917   gcc_assert (cand->iv != NULL);
5918
5919   /* There are two costs associated with the candidate -- its increment
5920      and its initialization.  The second is almost negligible for any loop
5921      that rolls enough, so we take it just very little into account.  */
5922
5923   base = cand->iv->base;
5924   cost_base = force_var_cost (data, base, NULL);
5925   /* It will be exceptional that the iv register happens to be initialized with
5926      the proper value at no cost.  In general, there will at least be a regcopy
5927      or a const set.  */
5928   if (cost_base.cost == 0)
5929     cost_base.cost = COSTS_N_INSNS (1);
5930   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5931
5932   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5933
5934   /* Prefer the original ivs unless we may gain something by replacing it.
5935      The reason is to make debugging simpler; so this is not relevant for
5936      artificial ivs created by other optimization passes.  */
5937   if (cand->pos != IP_ORIGINAL
5938       || !SSA_NAME_VAR (cand->var_before)
5939       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5940     cost++;
5941
5942   /* Prefer not to insert statements into latch unless there are some
5943      already (so that we do not create unnecessary jumps).  */
5944   if (cand->pos == IP_END
5945       && empty_block_p (ip_end_pos (data->current_loop)))
5946     cost++;
5947
5948   cand->cost = cost;
5949   cand->cost_step = cost_step;
5950 }
5951
5952 /* Determines costs of computation of the candidates.  */
5953
5954 static void
5955 determine_iv_costs (struct ivopts_data *data)
5956 {
5957   unsigned i;
5958
5959   if (dump_file && (dump_flags & TDF_DETAILS))
5960     {
5961       fprintf (dump_file, "<Candidate Costs>:\n");
5962       fprintf (dump_file, "  cand\tcost\n");
5963     }
5964
5965   for (i = 0; i < data->vcands.length (); i++)
5966     {
5967       struct iv_cand *cand = data->vcands[i];
5968
5969       determine_iv_cost (data, cand);
5970
5971       if (dump_file && (dump_flags & TDF_DETAILS))
5972         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5973     }
5974
5975   if (dump_file && (dump_flags & TDF_DETAILS))
5976     fprintf (dump_file, "\n");
5977 }
5978
5979 /* Calculates cost for having N_REGS registers.  This number includes
5980    induction variables, invariant variables and invariant expressions.  */
5981
5982 static unsigned
5983 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned n_regs)
5984 {
5985   unsigned cost = estimate_reg_pressure_cost (n_regs,
5986                                               data->regs_used, data->speed,
5987                                               data->body_includes_call);
5988   /* Add n_regs to the cost, so that we prefer eliminating ivs if possible.  */
5989   return n_regs + cost;
5990 }
5991
5992 /* For each size of the induction variable set determine the penalty.  */
5993
5994 static void
5995 determine_set_costs (struct ivopts_data *data)
5996 {
5997   unsigned j, n;
5998   gphi *phi;
5999   gphi_iterator psi;
6000   tree op;
6001   struct loop *loop = data->current_loop;
6002   bitmap_iterator bi;
6003
6004   if (dump_file && (dump_flags & TDF_DETAILS))
6005     {
6006       fprintf (dump_file, "<Global Costs>:\n");
6007       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
6008       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
6009       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6010       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6011     }
6012
6013   n = 0;
6014   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6015     {
6016       phi = psi.phi ();
6017       op = PHI_RESULT (phi);
6018
6019       if (virtual_operand_p (op))
6020         continue;
6021
6022       if (get_iv (data, op))
6023         continue;
6024
6025       n++;
6026     }
6027
6028   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6029     {
6030       struct version_info *info = ver_info (data, j);
6031
6032       if (info->inv_id && info->has_nonlin_use)
6033         n++;
6034     }
6035
6036   data->regs_used = n;
6037   if (dump_file && (dump_flags & TDF_DETAILS))
6038     fprintf (dump_file, "  regs_used %d\n", n);
6039
6040   if (dump_file && (dump_flags & TDF_DETAILS))
6041     {
6042       fprintf (dump_file, "  cost for size:\n");
6043       fprintf (dump_file, "  ivs\tcost\n");
6044       for (j = 0; j <= 2 * target_avail_regs; j++)
6045         fprintf (dump_file, "  %d\t%d\n", j,
6046                  ivopts_global_cost_for_size (data, j));
6047       fprintf (dump_file, "\n");
6048     }
6049 }
6050
6051 /* Returns true if A is a cheaper cost pair than B.  */
6052
6053 static bool
6054 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
6055 {
6056   if (!a)
6057     return false;
6058
6059   if (!b)
6060     return true;
6061
6062   if (a->cost < b->cost)
6063     return true;
6064
6065   if (b->cost < a->cost)
6066     return false;
6067
6068   /* In case the costs are the same, prefer the cheaper candidate.  */
6069   if (a->cand->cost < b->cand->cost)
6070     return true;
6071
6072   return false;
6073 }
6074
6075
6076 /* Returns candidate by that USE is expressed in IVS.  */
6077
6078 static struct cost_pair *
6079 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
6080 {
6081   return ivs->cand_for_group[group->id];
6082 }
6083
6084 /* Computes the cost field of IVS structure.  */
6085
6086 static void
6087 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6088 {
6089   comp_cost cost = ivs->cand_use_cost;
6090
6091   cost += ivs->cand_cost;
6092   cost += ivopts_global_cost_for_size (data, ivs->n_invs + ivs->n_cands);
6093   ivs->cost = cost;
6094 }
6095
6096 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6097    and IVS.  */
6098
6099 static void
6100 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6101 {
6102   bitmap_iterator bi;
6103   unsigned iid;
6104
6105   if (!invs)
6106     return;
6107
6108   gcc_assert (n_inv_uses != NULL);
6109   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6110     {
6111       n_inv_uses[iid]--;
6112       if (n_inv_uses[iid] == 0)
6113         ivs->n_invs--;
6114     }
6115 }
6116
6117 /* Set USE not to be expressed by any candidate in IVS.  */
6118
6119 static void
6120 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6121                  struct iv_group *group)
6122 {
6123   unsigned gid = group->id, cid;
6124   struct cost_pair *cp;
6125
6126   cp = ivs->cand_for_group[gid];
6127   if (!cp)
6128     return;
6129   cid = cp->cand->id;
6130
6131   ivs->bad_groups++;
6132   ivs->cand_for_group[gid] = NULL;
6133   ivs->n_cand_uses[cid]--;
6134
6135   if (ivs->n_cand_uses[cid] == 0)
6136     {
6137       bitmap_clear_bit (ivs->cands, cid);
6138       ivs->n_cands--;
6139       ivs->cand_cost -= cp->cand->cost;
6140       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6141     }
6142
6143   ivs->cand_use_cost -= cp->cost;
6144   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6145   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6146   iv_ca_recount_cost (data, ivs);
6147 }
6148
6149 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6150    IVS.  */
6151
6152 static void
6153 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6154 {
6155   bitmap_iterator bi;
6156   unsigned iid;
6157
6158   if (!invs)
6159     return;
6160
6161   gcc_assert (n_inv_uses != NULL);
6162   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6163     {
6164       n_inv_uses[iid]++;
6165       if (n_inv_uses[iid] == 1)
6166         ivs->n_invs++;
6167     }
6168 }
6169
6170 /* Set cost pair for GROUP in set IVS to CP.  */
6171
6172 static void
6173 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6174               struct iv_group *group, struct cost_pair *cp)
6175 {
6176   unsigned gid = group->id, cid;
6177
6178   if (ivs->cand_for_group[gid] == cp)
6179     return;
6180
6181   if (ivs->cand_for_group[gid])
6182     iv_ca_set_no_cp (data, ivs, group);
6183
6184   if (cp)
6185     {
6186       cid = cp->cand->id;
6187
6188       ivs->bad_groups--;
6189       ivs->cand_for_group[gid] = cp;
6190       ivs->n_cand_uses[cid]++;
6191       if (ivs->n_cand_uses[cid] == 1)
6192         {
6193           bitmap_set_bit (ivs->cands, cid);
6194           ivs->n_cands++;
6195           ivs->cand_cost += cp->cand->cost;
6196           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6197         }
6198
6199       ivs->cand_use_cost += cp->cost;
6200       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6201       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6202       iv_ca_recount_cost (data, ivs);
6203     }
6204 }
6205
6206 /* Extend set IVS by expressing USE by some of the candidates in it
6207    if possible.  Consider all important candidates if candidates in
6208    set IVS don't give any result.  */
6209
6210 static void
6211 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
6212                struct iv_group *group)
6213 {
6214   struct cost_pair *best_cp = NULL, *cp;
6215   bitmap_iterator bi;
6216   unsigned i;
6217   struct iv_cand *cand;
6218
6219   gcc_assert (ivs->upto >= group->id);
6220   ivs->upto++;
6221   ivs->bad_groups++;
6222
6223   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6224     {
6225       cand = data->vcands[i];
6226       cp = get_group_iv_cost (data, group, cand);
6227       if (cheaper_cost_pair (cp, best_cp))
6228         best_cp = cp;
6229     }
6230
6231   if (best_cp == NULL)
6232     {
6233       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6234         {
6235           cand = data->vcands[i];
6236           cp = get_group_iv_cost (data, group, cand);
6237           if (cheaper_cost_pair (cp, best_cp))
6238             best_cp = cp;
6239         }
6240     }
6241
6242   iv_ca_set_cp (data, ivs, group, best_cp);
6243 }
6244
6245 /* Get cost for assignment IVS.  */
6246
6247 static comp_cost
6248 iv_ca_cost (struct iv_ca *ivs)
6249 {
6250   /* This was a conditional expression but it triggered a bug in
6251      Sun C 5.5.  */
6252   if (ivs->bad_groups)
6253     return infinite_cost;
6254   else
6255     return ivs->cost;
6256 }
6257
6258 /* Returns true if all dependences of CP are among invariants in IVS.  */
6259
6260 static bool
6261 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6262 {
6263   unsigned i;
6264   bitmap_iterator bi;
6265
6266   if (cp->inv_vars)
6267     EXECUTE_IF_SET_IN_BITMAP (cp->inv_vars, 0, i, bi)
6268       if (ivs->n_inv_var_uses[i] == 0)
6269         return false;
6270
6271   if (cp->inv_exprs)
6272     EXECUTE_IF_SET_IN_BITMAP (cp->inv_exprs, 0, i, bi)
6273       if (ivs->n_inv_expr_uses[i] == 0)
6274         return false;
6275
6276   return true;
6277 }
6278
6279 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6280    it before NEXT.  */
6281
6282 static struct iv_ca_delta *
6283 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6284                  struct cost_pair *new_cp, struct iv_ca_delta *next)
6285 {
6286   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6287
6288   change->group = group;
6289   change->old_cp = old_cp;
6290   change->new_cp = new_cp;
6291   change->next = next;
6292
6293   return change;
6294 }
6295
6296 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6297    are rewritten.  */
6298
6299 static struct iv_ca_delta *
6300 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6301 {
6302   struct iv_ca_delta *last;
6303
6304   if (!l2)
6305     return l1;
6306
6307   if (!l1)
6308     return l2;
6309
6310   for (last = l1; last->next; last = last->next)
6311     continue;
6312   last->next = l2;
6313
6314   return l1;
6315 }
6316
6317 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6318
6319 static struct iv_ca_delta *
6320 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6321 {
6322   struct iv_ca_delta *act, *next, *prev = NULL;
6323
6324   for (act = delta; act; act = next)
6325     {
6326       next = act->next;
6327       act->next = prev;
6328       prev = act;
6329
6330       std::swap (act->old_cp, act->new_cp);
6331     }
6332
6333   return prev;
6334 }
6335
6336 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6337    reverted instead.  */
6338
6339 static void
6340 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6341                     struct iv_ca_delta *delta, bool forward)
6342 {
6343   struct cost_pair *from, *to;
6344   struct iv_ca_delta *act;
6345
6346   if (!forward)
6347     delta = iv_ca_delta_reverse (delta);
6348
6349   for (act = delta; act; act = act->next)
6350     {
6351       from = act->old_cp;
6352       to = act->new_cp;
6353       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6354       iv_ca_set_cp (data, ivs, act->group, to);
6355     }
6356
6357   if (!forward)
6358     iv_ca_delta_reverse (delta);
6359 }
6360
6361 /* Returns true if CAND is used in IVS.  */
6362
6363 static bool
6364 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6365 {
6366   return ivs->n_cand_uses[cand->id] > 0;
6367 }
6368
6369 /* Returns number of induction variable candidates in the set IVS.  */
6370
6371 static unsigned
6372 iv_ca_n_cands (struct iv_ca *ivs)
6373 {
6374   return ivs->n_cands;
6375 }
6376
6377 /* Free the list of changes DELTA.  */
6378
6379 static void
6380 iv_ca_delta_free (struct iv_ca_delta **delta)
6381 {
6382   struct iv_ca_delta *act, *next;
6383
6384   for (act = *delta; act; act = next)
6385     {
6386       next = act->next;
6387       free (act);
6388     }
6389
6390   *delta = NULL;
6391 }
6392
6393 /* Allocates new iv candidates assignment.  */
6394
6395 static struct iv_ca *
6396 iv_ca_new (struct ivopts_data *data)
6397 {
6398   struct iv_ca *nw = XNEW (struct iv_ca);
6399
6400   nw->upto = 0;
6401   nw->bad_groups = 0;
6402   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6403                                  data->vgroups.length ());
6404   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6405   nw->cands = BITMAP_ALLOC (NULL);
6406   nw->n_cands = 0;
6407   nw->n_invs = 0;
6408   nw->cand_use_cost = no_cost;
6409   nw->cand_cost = 0;
6410   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6411   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6412   nw->cost = no_cost;
6413
6414   return nw;
6415 }
6416
6417 /* Free memory occupied by the set IVS.  */
6418
6419 static void
6420 iv_ca_free (struct iv_ca **ivs)
6421 {
6422   free ((*ivs)->cand_for_group);
6423   free ((*ivs)->n_cand_uses);
6424   BITMAP_FREE ((*ivs)->cands);
6425   free ((*ivs)->n_inv_var_uses);
6426   free ((*ivs)->n_inv_expr_uses);
6427   free (*ivs);
6428   *ivs = NULL;
6429 }
6430
6431 /* Dumps IVS to FILE.  */
6432
6433 static void
6434 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6435 {
6436   unsigned i;
6437   comp_cost cost = iv_ca_cost (ivs);
6438
6439   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6440            cost.complexity);
6441   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6442            ivs->cand_cost, ivs->cand_use_cost.cost,
6443            ivs->cand_use_cost.complexity);
6444   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6445
6446   for (i = 0; i < ivs->upto; i++)
6447     {
6448       struct iv_group *group = data->vgroups[i];
6449       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6450       if (cp)
6451         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6452                  group->id, cp->cand->id, cp->cost.cost,
6453                  cp->cost.complexity);
6454       else
6455         fprintf (file, "   group:%d --> ??\n", group->id);
6456     }
6457
6458   const char *pref = "";
6459   fprintf (file, "  invariant variables: ");
6460   for (i = 1; i <= data->max_inv_var_id; i++)
6461     if (ivs->n_inv_var_uses[i])
6462       {
6463         fprintf (file, "%s%d", pref, i);
6464         pref = ", ";
6465       }
6466
6467   pref = "";
6468   fprintf (file, "\n  invariant expressions: ");
6469   for (i = 1; i <= data->max_inv_expr_id; i++)
6470     if (ivs->n_inv_expr_uses[i])
6471       {
6472         fprintf (file, "%s%d", pref, i);
6473         pref = ", ";
6474       }
6475
6476   fprintf (file, "\n\n");
6477 }
6478
6479 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6480    new set, and store differences in DELTA.  Number of induction variables
6481    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6482    the function will try to find a solution with mimimal iv candidates.  */
6483
6484 static comp_cost
6485 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6486               struct iv_cand *cand, struct iv_ca_delta **delta,
6487               unsigned *n_ivs, bool min_ncand)
6488 {
6489   unsigned i;
6490   comp_cost cost;
6491   struct iv_group *group;
6492   struct cost_pair *old_cp, *new_cp;
6493
6494   *delta = NULL;
6495   for (i = 0; i < ivs->upto; i++)
6496     {
6497       group = data->vgroups[i];
6498       old_cp = iv_ca_cand_for_group (ivs, group);
6499
6500       if (old_cp
6501           && old_cp->cand == cand)
6502         continue;
6503
6504       new_cp = get_group_iv_cost (data, group, cand);
6505       if (!new_cp)
6506         continue;
6507
6508       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6509         continue;
6510
6511       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6512         continue;
6513
6514       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6515     }
6516
6517   iv_ca_delta_commit (data, ivs, *delta, true);
6518   cost = iv_ca_cost (ivs);
6519   if (n_ivs)
6520     *n_ivs = iv_ca_n_cands (ivs);
6521   iv_ca_delta_commit (data, ivs, *delta, false);
6522
6523   return cost;
6524 }
6525
6526 /* Try narrowing set IVS by removing CAND.  Return the cost of
6527    the new set and store the differences in DELTA.  START is
6528    the candidate with which we start narrowing.  */
6529
6530 static comp_cost
6531 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6532               struct iv_cand *cand, struct iv_cand *start,
6533               struct iv_ca_delta **delta)
6534 {
6535   unsigned i, ci;
6536   struct iv_group *group;
6537   struct cost_pair *old_cp, *new_cp, *cp;
6538   bitmap_iterator bi;
6539   struct iv_cand *cnd;
6540   comp_cost cost, best_cost, acost;
6541
6542   *delta = NULL;
6543   for (i = 0; i < data->vgroups.length (); i++)
6544     {
6545       group = data->vgroups[i];
6546
6547       old_cp = iv_ca_cand_for_group (ivs, group);
6548       if (old_cp->cand != cand)
6549         continue;
6550
6551       best_cost = iv_ca_cost (ivs);
6552       /* Start narrowing with START.  */
6553       new_cp = get_group_iv_cost (data, group, start);
6554
6555       if (data->consider_all_candidates)
6556         {
6557           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6558             {
6559               if (ci == cand->id || (start && ci == start->id))
6560                 continue;
6561
6562               cnd = data->vcands[ci];
6563
6564               cp = get_group_iv_cost (data, group, cnd);
6565               if (!cp)
6566                 continue;
6567
6568               iv_ca_set_cp (data, ivs, group, cp);
6569               acost = iv_ca_cost (ivs);
6570
6571               if (acost < best_cost)
6572                 {
6573                   best_cost = acost;
6574                   new_cp = cp;
6575                 }
6576             }
6577         }
6578       else
6579         {
6580           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6581             {
6582               if (ci == cand->id || (start && ci == start->id))
6583                 continue;
6584
6585               cnd = data->vcands[ci];
6586
6587               cp = get_group_iv_cost (data, group, cnd);
6588               if (!cp)
6589                 continue;
6590
6591               iv_ca_set_cp (data, ivs, group, cp);
6592               acost = iv_ca_cost (ivs);
6593
6594               if (acost < best_cost)
6595                 {
6596                   best_cost = acost;
6597                   new_cp = cp;
6598                 }
6599             }
6600         }
6601       /* Restore to old cp for use.  */
6602       iv_ca_set_cp (data, ivs, group, old_cp);
6603
6604       if (!new_cp)
6605         {
6606           iv_ca_delta_free (delta);
6607           return infinite_cost;
6608         }
6609
6610       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6611     }
6612
6613   iv_ca_delta_commit (data, ivs, *delta, true);
6614   cost = iv_ca_cost (ivs);
6615   iv_ca_delta_commit (data, ivs, *delta, false);
6616
6617   return cost;
6618 }
6619
6620 /* Try optimizing the set of candidates IVS by removing candidates different
6621    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6622    differences in DELTA.  */
6623
6624 static comp_cost
6625 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6626              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6627 {
6628   bitmap_iterator bi;
6629   struct iv_ca_delta *act_delta, *best_delta;
6630   unsigned i;
6631   comp_cost best_cost, acost;
6632   struct iv_cand *cand;
6633
6634   best_delta = NULL;
6635   best_cost = iv_ca_cost (ivs);
6636
6637   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6638     {
6639       cand = data->vcands[i];
6640
6641       if (cand == except_cand)
6642         continue;
6643
6644       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6645
6646       if (acost < best_cost)
6647         {
6648           best_cost = acost;
6649           iv_ca_delta_free (&best_delta);
6650           best_delta = act_delta;
6651         }
6652       else
6653         iv_ca_delta_free (&act_delta);
6654     }
6655
6656   if (!best_delta)
6657     {
6658       *delta = NULL;
6659       return best_cost;
6660     }
6661
6662   /* Recurse to possibly remove other unnecessary ivs.  */
6663   iv_ca_delta_commit (data, ivs, best_delta, true);
6664   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6665   iv_ca_delta_commit (data, ivs, best_delta, false);
6666   *delta = iv_ca_delta_join (best_delta, *delta);
6667   return best_cost;
6668 }
6669
6670 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6671    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6672    the corresponding cost_pair, otherwise just return BEST_CP.  */
6673
6674 static struct cost_pair*
6675 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6676                         unsigned int cand_idx, struct iv_cand *old_cand,
6677                         struct cost_pair *best_cp)
6678 {
6679   struct iv_cand *cand;
6680   struct cost_pair *cp;
6681
6682   gcc_assert (old_cand != NULL && best_cp != NULL);
6683   if (cand_idx == old_cand->id)
6684     return best_cp;
6685
6686   cand = data->vcands[cand_idx];
6687   cp = get_group_iv_cost (data, group, cand);
6688   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6689     return cp;
6690
6691   return best_cp;
6692 }
6693
6694 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6695    which are used by more than one iv uses.  For each of those candidates,
6696    this function tries to represent iv uses under that candidate using
6697    other ones with lower local cost, then tries to prune the new set.
6698    If the new set has lower cost, It returns the new cost after recording
6699    candidate replacement in list DELTA.  */
6700
6701 static comp_cost
6702 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6703                struct iv_ca_delta **delta)
6704 {
6705   bitmap_iterator bi, bj;
6706   unsigned int i, j, k;
6707   struct iv_cand *cand;
6708   comp_cost orig_cost, acost;
6709   struct iv_ca_delta *act_delta, *tmp_delta;
6710   struct cost_pair *old_cp, *best_cp = NULL;
6711
6712   *delta = NULL;
6713   orig_cost = iv_ca_cost (ivs);
6714
6715   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6716     {
6717       if (ivs->n_cand_uses[i] == 1
6718           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6719         continue;
6720
6721       cand = data->vcands[i];
6722
6723       act_delta = NULL;
6724       /*  Represent uses under current candidate using other ones with
6725           lower local cost.  */
6726       for (j = 0; j < ivs->upto; j++)
6727         {
6728           struct iv_group *group = data->vgroups[j];
6729           old_cp = iv_ca_cand_for_group (ivs, group);
6730
6731           if (old_cp->cand != cand)
6732             continue;
6733
6734           best_cp = old_cp;
6735           if (data->consider_all_candidates)
6736             for (k = 0; k < data->vcands.length (); k++)
6737               best_cp = cheaper_cost_with_cand (data, group, k,
6738                                                 old_cp->cand, best_cp);
6739           else
6740             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6741               best_cp = cheaper_cost_with_cand (data, group, k,
6742                                                 old_cp->cand, best_cp);
6743
6744           if (best_cp == old_cp)
6745             continue;
6746
6747           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6748         }
6749       /* No need for further prune.  */
6750       if (!act_delta)
6751         continue;
6752
6753       /* Prune the new candidate set.  */
6754       iv_ca_delta_commit (data, ivs, act_delta, true);
6755       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6756       iv_ca_delta_commit (data, ivs, act_delta, false);
6757       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6758
6759       if (acost < orig_cost)
6760         {
6761           *delta = act_delta;
6762           return acost;
6763         }
6764       else
6765         iv_ca_delta_free (&act_delta);
6766     }
6767
6768   return orig_cost;
6769 }
6770
6771 /* Tries to extend the sets IVS in the best possible way in order to
6772    express the GROUP.  If ORIGINALP is true, prefer candidates from
6773    the original set of IVs, otherwise favor important candidates not
6774    based on any memory object.  */
6775
6776 static bool
6777 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6778                   struct iv_group *group, bool originalp)
6779 {
6780   comp_cost best_cost, act_cost;
6781   unsigned i;
6782   bitmap_iterator bi;
6783   struct iv_cand *cand;
6784   struct iv_ca_delta *best_delta = NULL, *act_delta;
6785   struct cost_pair *cp;
6786
6787   iv_ca_add_group (data, ivs, group);
6788   best_cost = iv_ca_cost (ivs);
6789   cp = iv_ca_cand_for_group (ivs, group);
6790   if (cp)
6791     {
6792       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6793       iv_ca_set_no_cp (data, ivs, group);
6794     }
6795
6796   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6797      first try important candidates not based on any memory object.  Only if
6798      this fails, try the specific ones.  Rationale -- in loops with many
6799      variables the best choice often is to use just one generic biv.  If we
6800      added here many ivs specific to the uses, the optimization algorithm later
6801      would be likely to get stuck in a local minimum, thus causing us to create
6802      too many ivs.  The approach from few ivs to more seems more likely to be
6803      successful -- starting from few ivs, replacing an expensive use by a
6804      specific iv should always be a win.  */
6805   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6806     {
6807       cand = data->vcands[i];
6808
6809       if (originalp && cand->pos !=IP_ORIGINAL)
6810         continue;
6811
6812       if (!originalp && cand->iv->base_object != NULL_TREE)
6813         continue;
6814
6815       if (iv_ca_cand_used_p (ivs, cand))
6816         continue;
6817
6818       cp = get_group_iv_cost (data, group, cand);
6819       if (!cp)
6820         continue;
6821
6822       iv_ca_set_cp (data, ivs, group, cp);
6823       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6824                                true);
6825       iv_ca_set_no_cp (data, ivs, group);
6826       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6827
6828       if (act_cost < best_cost)
6829         {
6830           best_cost = act_cost;
6831
6832           iv_ca_delta_free (&best_delta);
6833           best_delta = act_delta;
6834         }
6835       else
6836         iv_ca_delta_free (&act_delta);
6837     }
6838
6839   if (best_cost.infinite_cost_p ())
6840     {
6841       for (i = 0; i < group->n_map_members; i++)
6842         {
6843           cp = group->cost_map + i;
6844           cand = cp->cand;
6845           if (!cand)
6846             continue;
6847
6848           /* Already tried this.  */
6849           if (cand->important)
6850             {
6851               if (originalp && cand->pos == IP_ORIGINAL)
6852                 continue;
6853               if (!originalp && cand->iv->base_object == NULL_TREE)
6854                 continue;
6855             }
6856
6857           if (iv_ca_cand_used_p (ivs, cand))
6858             continue;
6859
6860           act_delta = NULL;
6861           iv_ca_set_cp (data, ivs, group, cp);
6862           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6863           iv_ca_set_no_cp (data, ivs, group);
6864           act_delta = iv_ca_delta_add (group,
6865                                        iv_ca_cand_for_group (ivs, group),
6866                                        cp, act_delta);
6867
6868           if (act_cost < best_cost)
6869             {
6870               best_cost = act_cost;
6871
6872               if (best_delta)
6873                 iv_ca_delta_free (&best_delta);
6874               best_delta = act_delta;
6875             }
6876           else
6877             iv_ca_delta_free (&act_delta);
6878         }
6879     }
6880
6881   iv_ca_delta_commit (data, ivs, best_delta, true);
6882   iv_ca_delta_free (&best_delta);
6883
6884   return !best_cost.infinite_cost_p ();
6885 }
6886
6887 /* Finds an initial assignment of candidates to uses.  */
6888
6889 static struct iv_ca *
6890 get_initial_solution (struct ivopts_data *data, bool originalp)
6891 {
6892   unsigned i;
6893   struct iv_ca *ivs = iv_ca_new (data);
6894
6895   for (i = 0; i < data->vgroups.length (); i++)
6896     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6897       {
6898         iv_ca_free (&ivs);
6899         return NULL;
6900       }
6901
6902   return ivs;
6903 }
6904
6905 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6906    points to a bool variable, this function tries to break local
6907    optimal fixed-point by replacing candidates in IVS if it's true.  */
6908
6909 static bool
6910 try_improve_iv_set (struct ivopts_data *data,
6911                     struct iv_ca *ivs, bool *try_replace_p)
6912 {
6913   unsigned i, n_ivs;
6914   comp_cost acost, best_cost = iv_ca_cost (ivs);
6915   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6916   struct iv_cand *cand;
6917
6918   /* Try extending the set of induction variables by one.  */
6919   for (i = 0; i < data->vcands.length (); i++)
6920     {
6921       cand = data->vcands[i];
6922
6923       if (iv_ca_cand_used_p (ivs, cand))
6924         continue;
6925
6926       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6927       if (!act_delta)
6928         continue;
6929
6930       /* If we successfully added the candidate and the set is small enough,
6931          try optimizing it by removing other candidates.  */
6932       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6933         {
6934           iv_ca_delta_commit (data, ivs, act_delta, true);
6935           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6936           iv_ca_delta_commit (data, ivs, act_delta, false);
6937           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6938         }
6939
6940       if (acost < best_cost)
6941         {
6942           best_cost = acost;
6943           iv_ca_delta_free (&best_delta);
6944           best_delta = act_delta;
6945         }
6946       else
6947         iv_ca_delta_free (&act_delta);
6948     }
6949
6950   if (!best_delta)
6951     {
6952       /* Try removing the candidates from the set instead.  */
6953       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6954
6955       if (!best_delta && *try_replace_p)
6956         {
6957           *try_replace_p = false;
6958           /* So far candidate selecting algorithm tends to choose fewer IVs
6959              so that it can handle cases in which loops have many variables
6960              but the best choice is often to use only one general biv.  One
6961              weakness is it can't handle opposite cases, in which different
6962              candidates should be chosen with respect to each use.  To solve
6963              the problem, we replace candidates in a manner described by the
6964              comments of iv_ca_replace, thus give general algorithm a chance
6965              to break local optimal fixed-point in these cases.  */
6966           best_cost = iv_ca_replace (data, ivs, &best_delta);
6967         }
6968
6969       if (!best_delta)
6970         return false;
6971     }
6972
6973   iv_ca_delta_commit (data, ivs, best_delta, true);
6974   gcc_assert (best_cost == iv_ca_cost (ivs));
6975   iv_ca_delta_free (&best_delta);
6976   return true;
6977 }
6978
6979 /* Attempts to find the optimal set of induction variables.  We do simple
6980    greedy heuristic -- we try to replace at most one candidate in the selected
6981    solution and remove the unused ivs while this improves the cost.  */
6982
6983 static struct iv_ca *
6984 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6985 {
6986   struct iv_ca *set;
6987   bool try_replace_p = true;
6988
6989   /* Get the initial solution.  */
6990   set = get_initial_solution (data, originalp);
6991   if (!set)
6992     {
6993       if (dump_file && (dump_flags & TDF_DETAILS))
6994         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6995       return NULL;
6996     }
6997
6998   if (dump_file && (dump_flags & TDF_DETAILS))
6999     {
7000       fprintf (dump_file, "Initial set of candidates:\n");
7001       iv_ca_dump (data, dump_file, set);
7002     }
7003
7004   while (try_improve_iv_set (data, set, &try_replace_p))
7005     {
7006       if (dump_file && (dump_flags & TDF_DETAILS))
7007         {
7008           fprintf (dump_file, "Improved to:\n");
7009           iv_ca_dump (data, dump_file, set);
7010         }
7011     }
7012
7013   return set;
7014 }
7015
7016 static struct iv_ca *
7017 find_optimal_iv_set (struct ivopts_data *data)
7018 {
7019   unsigned i;
7020   comp_cost cost, origcost;
7021   struct iv_ca *set, *origset;
7022
7023   /* Determine the cost based on a strategy that starts with original IVs,
7024      and try again using a strategy that prefers candidates not based
7025      on any IVs.  */
7026   origset = find_optimal_iv_set_1 (data, true);
7027   set = find_optimal_iv_set_1 (data, false);
7028
7029   if (!origset && !set)
7030     return NULL;
7031
7032   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7033   cost = set ? iv_ca_cost (set) : infinite_cost;
7034
7035   if (dump_file && (dump_flags & TDF_DETAILS))
7036     {
7037       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
7038                origcost.cost, origcost.complexity);
7039       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
7040                cost.cost, cost.complexity);
7041     }
7042
7043   /* Choose the one with the best cost.  */
7044   if (origcost <= cost)
7045     {
7046       if (set)
7047         iv_ca_free (&set);
7048       set = origset;
7049     }
7050   else if (origset)
7051     iv_ca_free (&origset);
7052
7053   for (i = 0; i < data->vgroups.length (); i++)
7054     {
7055       struct iv_group *group = data->vgroups[i];
7056       group->selected = iv_ca_cand_for_group (set, group)->cand;
7057     }
7058
7059   return set;
7060 }
7061
7062 /* Creates a new induction variable corresponding to CAND.  */
7063
7064 static void
7065 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7066 {
7067   gimple_stmt_iterator incr_pos;
7068   tree base;
7069   struct iv_use *use;
7070   struct iv_group *group;
7071   bool after = false;
7072
7073   gcc_assert (cand->iv != NULL);
7074
7075   switch (cand->pos)
7076     {
7077     case IP_NORMAL:
7078       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7079       break;
7080
7081     case IP_END:
7082       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7083       after = true;
7084       break;
7085
7086     case IP_AFTER_USE:
7087       after = true;
7088       /* fall through */
7089     case IP_BEFORE_USE:
7090       incr_pos = gsi_for_stmt (cand->incremented_at);
7091       break;
7092
7093     case IP_ORIGINAL:
7094       /* Mark that the iv is preserved.  */
7095       name_info (data, cand->var_before)->preserve_biv = true;
7096       name_info (data, cand->var_after)->preserve_biv = true;
7097
7098       /* Rewrite the increment so that it uses var_before directly.  */
7099       use = find_interesting_uses_op (data, cand->var_after);
7100       group = data->vgroups[use->group_id];
7101       group->selected = cand;
7102       return;
7103     }
7104
7105   gimple_add_tmp_var (cand->var_before);
7106
7107   base = unshare_expr (cand->iv->base);
7108
7109   create_iv (base, unshare_expr (cand->iv->step),
7110              cand->var_before, data->current_loop,
7111              &incr_pos, after, &cand->var_before, &cand->var_after);
7112 }
7113
7114 /* Creates new induction variables described in SET.  */
7115
7116 static void
7117 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7118 {
7119   unsigned i;
7120   struct iv_cand *cand;
7121   bitmap_iterator bi;
7122
7123   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7124     {
7125       cand = data->vcands[i];
7126       create_new_iv (data, cand);
7127     }
7128
7129   if (dump_file && (dump_flags & TDF_DETAILS))
7130     {
7131       fprintf (dump_file, "Selected IV set for loop %d",
7132                data->current_loop->num);
7133       if (data->loop_loc != UNKNOWN_LOCATION)
7134         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7135                  LOCATION_LINE (data->loop_loc));
7136       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7137                avg_loop_niter (data->current_loop));
7138       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7139       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7140         {
7141           cand = data->vcands[i];
7142           dump_cand (dump_file, cand);
7143         }
7144       fprintf (dump_file, "\n");
7145     }
7146 }
7147
7148 /* Rewrites USE (definition of iv used in a nonlinear expression)
7149    using candidate CAND.  */
7150
7151 static void
7152 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7153                             struct iv_use *use, struct iv_cand *cand)
7154 {
7155   tree comp;
7156   tree tgt;
7157   gassign *ass;
7158   gimple_stmt_iterator bsi;
7159
7160   /* An important special case -- if we are asked to express value of
7161      the original iv by itself, just exit; there is no need to
7162      introduce a new computation (that might also need casting the
7163      variable to unsigned and back).  */
7164   if (cand->pos == IP_ORIGINAL
7165       && cand->incremented_at == use->stmt)
7166     {
7167       tree op = NULL_TREE;
7168       enum tree_code stmt_code;
7169
7170       gcc_assert (is_gimple_assign (use->stmt));
7171       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7172
7173       /* Check whether we may leave the computation unchanged.
7174          This is the case only if it does not rely on other
7175          computations in the loop -- otherwise, the computation
7176          we rely upon may be removed in remove_unused_ivs,
7177          thus leading to ICE.  */
7178       stmt_code = gimple_assign_rhs_code (use->stmt);
7179       if (stmt_code == PLUS_EXPR
7180           || stmt_code == MINUS_EXPR
7181           || stmt_code == POINTER_PLUS_EXPR)
7182         {
7183           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7184             op = gimple_assign_rhs2 (use->stmt);
7185           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7186             op = gimple_assign_rhs1 (use->stmt);
7187         }
7188
7189       if (op != NULL_TREE)
7190         {
7191           if (expr_invariant_in_loop_p (data->current_loop, op))
7192             return;
7193           if (TREE_CODE (op) == SSA_NAME)
7194             {
7195               struct iv *iv = get_iv (data, op);
7196               if (iv != NULL && integer_zerop (iv->step))
7197                 return;
7198             }
7199         }
7200     }
7201
7202   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7203   gcc_assert (comp != NULL_TREE);
7204
7205   switch (gimple_code (use->stmt))
7206     {
7207     case GIMPLE_PHI:
7208       tgt = PHI_RESULT (use->stmt);
7209
7210       /* If we should keep the biv, do not replace it.  */
7211       if (name_info (data, tgt)->preserve_biv)
7212         return;
7213
7214       bsi = gsi_after_labels (gimple_bb (use->stmt));
7215       break;
7216
7217     case GIMPLE_ASSIGN:
7218       tgt = gimple_assign_lhs (use->stmt);
7219       bsi = gsi_for_stmt (use->stmt);
7220       break;
7221
7222     default:
7223       gcc_unreachable ();
7224     }
7225
7226   if (!valid_gimple_rhs_p (comp)
7227       || (gimple_code (use->stmt) != GIMPLE_PHI
7228           /* We can't allow re-allocating the stmt as it might be pointed
7229              to still.  */
7230           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7231               >= gimple_num_ops (gsi_stmt (bsi)))))
7232     {
7233       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7234                                        true, GSI_SAME_STMT);
7235       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7236         {
7237           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7238           /* As this isn't a plain copy we have to reset alignment
7239              information.  */
7240           if (SSA_NAME_PTR_INFO (comp))
7241             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7242         }
7243     }
7244
7245   if (gimple_code (use->stmt) == GIMPLE_PHI)
7246     {
7247       ass = gimple_build_assign (tgt, comp);
7248       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7249
7250       bsi = gsi_for_stmt (use->stmt);
7251       remove_phi_node (&bsi, false);
7252     }
7253   else
7254     {
7255       gimple_assign_set_rhs_from_tree (&bsi, comp);
7256       use->stmt = gsi_stmt (bsi);
7257     }
7258 }
7259
7260 /* Performs a peephole optimization to reorder the iv update statement with
7261    a mem ref to enable instruction combining in later phases. The mem ref uses
7262    the iv value before the update, so the reordering transformation requires
7263    adjustment of the offset. CAND is the selected IV_CAND.
7264
7265    Example:
7266
7267    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7268    iv2 = iv1 + 1;
7269
7270    if (t < val)      (1)
7271      goto L;
7272    goto Head;
7273
7274
7275    directly propagating t over to (1) will introduce overlapping live range
7276    thus increase register pressure. This peephole transform it into:
7277
7278
7279    iv2 = iv1 + 1;
7280    t = MEM_REF (base, iv2, 8, 8);
7281    if (t < val)
7282      goto L;
7283    goto Head;
7284 */
7285
7286 static void
7287 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7288 {
7289   tree var_after;
7290   gimple *iv_update, *stmt;
7291   basic_block bb;
7292   gimple_stmt_iterator gsi, gsi_iv;
7293
7294   if (cand->pos != IP_NORMAL)
7295     return;
7296
7297   var_after = cand->var_after;
7298   iv_update = SSA_NAME_DEF_STMT (var_after);
7299
7300   bb = gimple_bb (iv_update);
7301   gsi = gsi_last_nondebug_bb (bb);
7302   stmt = gsi_stmt (gsi);
7303
7304   /* Only handle conditional statement for now.  */
7305   if (gimple_code (stmt) != GIMPLE_COND)
7306     return;
7307
7308   gsi_prev_nondebug (&gsi);
7309   stmt = gsi_stmt (gsi);
7310   if (stmt != iv_update)
7311     return;
7312
7313   gsi_prev_nondebug (&gsi);
7314   if (gsi_end_p (gsi))
7315     return;
7316
7317   stmt = gsi_stmt (gsi);
7318   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7319     return;
7320
7321   if (stmt != use->stmt)
7322     return;
7323
7324   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7325     return;
7326
7327   if (dump_file && (dump_flags & TDF_DETAILS))
7328     {
7329       fprintf (dump_file, "Reordering \n");
7330       print_gimple_stmt (dump_file, iv_update, 0, 0);
7331       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7332       fprintf (dump_file, "\n");
7333     }
7334
7335   gsi = gsi_for_stmt (use->stmt);
7336   gsi_iv = gsi_for_stmt (iv_update);
7337   gsi_move_before (&gsi_iv, &gsi);
7338
7339   cand->pos = IP_BEFORE_USE;
7340   cand->incremented_at = use->stmt;
7341 }
7342
7343 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7344
7345 static void
7346 rewrite_use_address (struct ivopts_data *data,
7347                      struct iv_use *use, struct iv_cand *cand)
7348 {
7349   aff_tree aff;
7350   bool ok;
7351
7352   adjust_iv_update_pos (cand, use);
7353   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7354   gcc_assert (ok);
7355   unshare_aff_combination (&aff);
7356
7357   /* To avoid undefined overflow problems, all IV candidates use unsigned
7358      integer types.  The drawback is that this makes it impossible for
7359      create_mem_ref to distinguish an IV that is based on a memory object
7360      from one that represents simply an offset.
7361
7362      To work around this problem, we pass a hint to create_mem_ref that
7363      indicates which variable (if any) in aff is an IV based on a memory
7364      object.  Note that we only consider the candidate.  If this is not
7365      based on an object, the base of the reference is in some subexpression
7366      of the use -- but these will use pointer types, so they are recognized
7367      by the create_mem_ref heuristics anyway.  */
7368   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7369   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7370   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7371   tree type = TREE_TYPE (*use->op_p);
7372   unsigned int align = get_object_alignment (*use->op_p);
7373   if (align != TYPE_ALIGN (type))
7374     type = build_aligned_type (type, align);
7375
7376   tree ref = create_mem_ref (&bsi, type, &aff,
7377                              reference_alias_ptr_type (*use->op_p),
7378                              iv, base_hint, data->speed);
7379
7380   copy_ref_info (ref, *use->op_p);
7381   *use->op_p = ref;
7382 }
7383
7384 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7385    candidate CAND.  */
7386
7387 static void
7388 rewrite_use_compare (struct ivopts_data *data,
7389                      struct iv_use *use, struct iv_cand *cand)
7390 {
7391   tree comp, *var_p, op, bound;
7392   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7393   enum tree_code compare;
7394   struct iv_group *group = data->vgroups[use->group_id];
7395   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7396   bool ok;
7397
7398   bound = cp->value;
7399   if (bound)
7400     {
7401       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7402       tree var_type = TREE_TYPE (var);
7403       gimple_seq stmts;
7404
7405       if (dump_file && (dump_flags & TDF_DETAILS))
7406         {
7407           fprintf (dump_file, "Replacing exit test: ");
7408           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7409         }
7410       compare = cp->comp;
7411       bound = unshare_expr (fold_convert (var_type, bound));
7412       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7413       if (stmts)
7414         gsi_insert_seq_on_edge_immediate (
7415                 loop_preheader_edge (data->current_loop),
7416                 stmts);
7417
7418       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7419       gimple_cond_set_lhs (cond_stmt, var);
7420       gimple_cond_set_code (cond_stmt, compare);
7421       gimple_cond_set_rhs (cond_stmt, op);
7422       return;
7423     }
7424
7425   /* The induction variable elimination failed; just express the original
7426      giv.  */
7427   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7428   gcc_assert (comp != NULL_TREE);
7429
7430   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7431   gcc_assert (ok);
7432
7433   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7434                                      true, GSI_SAME_STMT);
7435 }
7436
7437 /* Rewrite the groups using the selected induction variables.  */
7438
7439 static void
7440 rewrite_groups (struct ivopts_data *data)
7441 {
7442   unsigned i, j;
7443
7444   for (i = 0; i < data->vgroups.length (); i++)
7445     {
7446       struct iv_group *group = data->vgroups[i];
7447       struct iv_cand *cand = group->selected;
7448
7449       gcc_assert (cand);
7450
7451       if (group->type == USE_NONLINEAR_EXPR)
7452         {
7453           for (j = 0; j < group->vuses.length (); j++)
7454             {
7455               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7456               update_stmt (group->vuses[j]->stmt);
7457             }
7458         }
7459       else if (group->type == USE_ADDRESS)
7460         {
7461           for (j = 0; j < group->vuses.length (); j++)
7462             {
7463               rewrite_use_address (data, group->vuses[j], cand);
7464               update_stmt (group->vuses[j]->stmt);
7465             }
7466         }
7467       else
7468         {
7469           gcc_assert (group->type == USE_COMPARE);
7470
7471           for (j = 0; j < group->vuses.length (); j++)
7472             {
7473               rewrite_use_compare (data, group->vuses[j], cand);
7474               update_stmt (group->vuses[j]->stmt);
7475             }
7476         }
7477     }
7478 }
7479
7480 /* Removes the ivs that are not used after rewriting.  */
7481
7482 static void
7483 remove_unused_ivs (struct ivopts_data *data)
7484 {
7485   unsigned j;
7486   bitmap_iterator bi;
7487   bitmap toremove = BITMAP_ALLOC (NULL);
7488
7489   /* Figure out an order in which to release SSA DEFs so that we don't
7490      release something that we'd have to propagate into a debug stmt
7491      afterwards.  */
7492   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7493     {
7494       struct version_info *info;
7495
7496       info = ver_info (data, j);
7497       if (info->iv
7498           && !integer_zerop (info->iv->step)
7499           && !info->inv_id
7500           && !info->iv->nonlin_use
7501           && !info->preserve_biv)
7502         {
7503           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7504
7505           tree def = info->iv->ssa_name;
7506
7507           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7508             {
7509               imm_use_iterator imm_iter;
7510               use_operand_p use_p;
7511               gimple *stmt;
7512               int count = 0;
7513
7514               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7515                 {
7516                   if (!gimple_debug_bind_p (stmt))
7517                     continue;
7518
7519                   /* We just want to determine whether to do nothing
7520                      (count == 0), to substitute the computed
7521                      expression into a single use of the SSA DEF by
7522                      itself (count == 1), or to use a debug temp
7523                      because the SSA DEF is used multiple times or as
7524                      part of a larger expression (count > 1). */
7525                   count++;
7526                   if (gimple_debug_bind_get_value (stmt) != def)
7527                     count++;
7528
7529                   if (count > 1)
7530                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7531                 }
7532
7533               if (!count)
7534                 continue;
7535
7536               struct iv_use dummy_use;
7537               struct iv_cand *best_cand = NULL, *cand;
7538               unsigned i, best_pref = 0, cand_pref;
7539
7540               memset (&dummy_use, 0, sizeof (dummy_use));
7541               dummy_use.iv = info->iv;
7542               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7543                 {
7544                   cand = data->vgroups[i]->selected;
7545                   if (cand == best_cand)
7546                     continue;
7547                   cand_pref = operand_equal_p (cand->iv->step,
7548                                                info->iv->step, 0)
7549                     ? 4 : 0;
7550                   cand_pref
7551                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7552                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7553                     ? 2 : 0;
7554                   cand_pref
7555                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7556                     ? 1 : 0;
7557                   if (best_cand == NULL || best_pref < cand_pref)
7558                     {
7559                       best_cand = cand;
7560                       best_pref = cand_pref;
7561                     }
7562                 }
7563
7564               if (!best_cand)
7565                 continue;
7566
7567               tree comp = get_computation_at (data->current_loop,
7568                                               SSA_NAME_DEF_STMT (def),
7569                                               &dummy_use, best_cand);
7570               if (!comp)
7571                 continue;
7572
7573               if (count > 1)
7574                 {
7575                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7576                   DECL_ARTIFICIAL (vexpr) = 1;
7577                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7578                   if (SSA_NAME_VAR (def))
7579                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7580                   else
7581                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7582                   gdebug *def_temp
7583                     = gimple_build_debug_bind (vexpr, comp, NULL);
7584                   gimple_stmt_iterator gsi;
7585
7586                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7587                     gsi = gsi_after_labels (gimple_bb
7588                                             (SSA_NAME_DEF_STMT (def)));
7589                   else
7590                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7591
7592                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7593                   comp = vexpr;
7594                 }
7595
7596               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7597                 {
7598                   if (!gimple_debug_bind_p (stmt))
7599                     continue;
7600
7601                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7602                     SET_USE (use_p, comp);
7603
7604                   update_stmt (stmt);
7605                 }
7606             }
7607         }
7608     }
7609
7610   release_defs_bitset (toremove);
7611
7612   BITMAP_FREE (toremove);
7613 }
7614
7615 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7616    for hash_map::traverse.  */
7617
7618 bool
7619 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7620 {
7621   free (value);
7622   return true;
7623 }
7624
7625 /* Frees data allocated by the optimization of a single loop.  */
7626
7627 static void
7628 free_loop_data (struct ivopts_data *data)
7629 {
7630   unsigned i, j;
7631   bitmap_iterator bi;
7632   tree obj;
7633
7634   if (data->niters)
7635     {
7636       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7637       delete data->niters;
7638       data->niters = NULL;
7639     }
7640
7641   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7642     {
7643       struct version_info *info;
7644
7645       info = ver_info (data, i);
7646       info->iv = NULL;
7647       info->has_nonlin_use = false;
7648       info->preserve_biv = false;
7649       info->inv_id = 0;
7650     }
7651   bitmap_clear (data->relevant);
7652   bitmap_clear (data->important_candidates);
7653
7654   for (i = 0; i < data->vgroups.length (); i++)
7655     {
7656       struct iv_group *group = data->vgroups[i];
7657
7658       for (j = 0; j < group->vuses.length (); j++)
7659         free (group->vuses[j]);
7660       group->vuses.release ();
7661
7662       BITMAP_FREE (group->related_cands);
7663       for (j = 0; j < group->n_map_members; j++)
7664         {
7665           if (group->cost_map[j].inv_vars)
7666             BITMAP_FREE (group->cost_map[j].inv_vars);
7667           if (group->cost_map[j].inv_exprs)
7668             BITMAP_FREE (group->cost_map[j].inv_exprs);
7669         }
7670
7671       free (group->cost_map);
7672       free (group);
7673     }
7674   data->vgroups.truncate (0);
7675
7676   for (i = 0; i < data->vcands.length (); i++)
7677     {
7678       struct iv_cand *cand = data->vcands[i];
7679
7680       if (cand->inv_vars)
7681         BITMAP_FREE (cand->inv_vars);
7682       free (cand);
7683     }
7684   data->vcands.truncate (0);
7685
7686   if (data->version_info_size < num_ssa_names)
7687     {
7688       data->version_info_size = 2 * num_ssa_names;
7689       free (data->version_info);
7690       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7691     }
7692
7693   data->max_inv_var_id = 0;
7694   data->max_inv_expr_id = 0;
7695
7696   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7697     SET_DECL_RTL (obj, NULL_RTX);
7698
7699   decl_rtl_to_reset.truncate (0);
7700
7701   data->inv_expr_tab->empty ();
7702
7703   data->iv_common_cand_tab->empty ();
7704   data->iv_common_cands.truncate (0);
7705 }
7706
7707 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7708    loop tree.  */
7709
7710 static void
7711 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7712 {
7713   free_loop_data (data);
7714   free (data->version_info);
7715   BITMAP_FREE (data->relevant);
7716   BITMAP_FREE (data->important_candidates);
7717
7718   decl_rtl_to_reset.release ();
7719   data->vgroups.release ();
7720   data->vcands.release ();
7721   delete data->inv_expr_tab;
7722   data->inv_expr_tab = NULL;
7723   free_affine_expand_cache (&data->name_expansion_cache);
7724   delete data->iv_common_cand_tab;
7725   data->iv_common_cand_tab = NULL;
7726   data->iv_common_cands.release ();
7727   obstack_free (&data->iv_obstack, NULL);
7728 }
7729
7730 /* Returns true if the loop body BODY includes any function calls.  */
7731
7732 static bool
7733 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7734 {
7735   gimple_stmt_iterator gsi;
7736   unsigned i;
7737
7738   for (i = 0; i < num_nodes; i++)
7739     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7740       {
7741         gimple *stmt = gsi_stmt (gsi);
7742         if (is_gimple_call (stmt)
7743             && !gimple_call_internal_p (stmt)
7744             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7745           return true;
7746       }
7747   return false;
7748 }
7749
7750 /* Optimizes the LOOP.  Returns true if anything changed.  */
7751
7752 static bool
7753 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7754 {
7755   bool changed = false;
7756   struct iv_ca *iv_ca;
7757   edge exit = single_dom_exit (loop);
7758   basic_block *body;
7759
7760   gcc_assert (!data->niters);
7761   data->current_loop = loop;
7762   data->loop_loc = find_loop_location (loop);
7763   data->speed = optimize_loop_for_speed_p (loop);
7764
7765   if (dump_file && (dump_flags & TDF_DETAILS))
7766     {
7767       fprintf (dump_file, "Processing loop %d", loop->num);
7768       if (data->loop_loc != UNKNOWN_LOCATION)
7769         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7770                  LOCATION_LINE (data->loop_loc));
7771       fprintf (dump_file, "\n");
7772
7773       if (exit)
7774         {
7775           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7776                    exit->src->index, exit->dest->index);
7777           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7778           fprintf (dump_file, "\n");
7779         }
7780
7781       fprintf (dump_file, "\n");
7782     }
7783
7784   body = get_loop_body (loop);
7785   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7786   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7787   free (body);
7788
7789   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7790
7791   /* For each ssa name determines whether it behaves as an induction variable
7792      in some loop.  */
7793   if (!find_induction_variables (data))
7794     goto finish;
7795
7796   /* Finds interesting uses (item 1).  */
7797   find_interesting_uses (data);
7798   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7799     goto finish;
7800
7801   /* Finds candidates for the induction variables (item 2).  */
7802   find_iv_candidates (data);
7803
7804   /* Calculates the costs (item 3, part 1).  */
7805   determine_iv_costs (data);
7806   determine_group_iv_costs (data);
7807   determine_set_costs (data);
7808
7809   /* Find the optimal set of induction variables (item 3, part 2).  */
7810   iv_ca = find_optimal_iv_set (data);
7811   if (!iv_ca)
7812     goto finish;
7813   changed = true;
7814
7815   /* Create the new induction variables (item 4, part 1).  */
7816   create_new_ivs (data, iv_ca);
7817   iv_ca_free (&iv_ca);
7818
7819   /* Rewrite the uses (item 4, part 2).  */
7820   rewrite_groups (data);
7821
7822   /* Remove the ivs that are unused after rewriting.  */
7823   remove_unused_ivs (data);
7824
7825   /* We have changed the structure of induction variables; it might happen
7826      that definitions in the scev database refer to some of them that were
7827      eliminated.  */
7828   scev_reset ();
7829
7830 finish:
7831   free_loop_data (data);
7832
7833   return changed;
7834 }
7835
7836 /* Main entry point.  Optimizes induction variables in loops.  */
7837
7838 void
7839 tree_ssa_iv_optimize (void)
7840 {
7841   struct loop *loop;
7842   struct ivopts_data data;
7843
7844   tree_ssa_iv_optimize_init (&data);
7845
7846   /* Optimize the loops starting with the innermost ones.  */
7847   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7848     {
7849       if (dump_file && (dump_flags & TDF_DETAILS))
7850         flow_loop_dump (loop, dump_file, NULL, 1);
7851
7852       tree_ssa_iv_optimize_loop (&data, loop);
7853     }
7854
7855   tree_ssa_iv_optimize_finalize (&data);
7856 }