gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "stor-layout.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "gimple-pretty-print.h"
  73 #include "pointer-set.h"
  74 #include "hash-table.h"
  75 #include "tree-ssa-alias.h"
  76 #include "internal-fn.h"
  77 #include "tree-eh.h"
  78 #include "gimple-expr.h"
  79 #include "is-a.h"
  80 #include "gimple.h"
  81 #include "gimplify.h"
  82 #include "gimple-iterator.h"
  83 #include "gimplify-me.h"
  84 #include "gimple-ssa.h"
  85 #include "cgraph.h"
  86 #include "tree-cfg.h"
  87 #include "tree-phinodes.h"
  88 #include "ssa-iterators.h"
  89 #include "stringpool.h"
  90 #include "tree-ssanames.h"
  91 #include "tree-ssa-loop-ivopts.h"
  92 #include "tree-ssa-loop-manip.h"
  93 #include "tree-ssa-loop-niter.h"
  94 #include "tree-ssa-loop.h"
  95 #include "expr.h"
  96 #include "tree-dfa.h"
  97 #include "tree-ssa.h"
  98 #include "cfgloop.h"
  99 #include "tree-pass.h"
 100 #include "insn-config.h"
 101 #include "tree-chrec.h"
 102 #include "tree-scalar-evolution.h"
 103 #include "cfgloop.h"
 104 #include "params.h"
 105 #include "langhooks.h"
 106 #include "tree-affine.h"
 107 #include "target.h"
 108 #include "tree-inline.h"
 109 #include "tree-ssa-propagate.h"
 110 #include "expmed.h"
 111 #include "tree-ssa-address.h"
 112
 113 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 114    cost of different addressing modes.  This should be moved to a TBD
 115    interface between the GIMPLE and RTL worlds.  */
 116 #include "expr.h"
 117 #include "recog.h"
 118
 119 /* The infinite cost.  */
 120 #define INFTY 10000000
 121
 122 #define AVG_LOOP_NITER(LOOP) 5
 123
 124 /* Returns the expected number of loop iterations for LOOP.
 125    The average trip count is computed from profile data if it
 126    exists. */
 127
 128 static inline HOST_WIDE_INT
 129 avg_loop_niter (struct loop *loop)
 130 {
 131   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 132   if (niter == -1)
 133     return AVG_LOOP_NITER (loop);
 134
 135   return niter;
 136 }
 137
 138 /* Representation of the induction variable.  */
 139 struct iv
 140 {
 141   tree base;            /* Initial value of the iv.  */
 142   tree base_object;     /* A memory object to that the induction variable points.  */
 143   tree step;            /* Step of the iv (constant only).  */
 144   tree ssa_name;        /* The ssa name with the value.  */
 145   bool biv_p;           /* Is it a biv?  */
 146   bool have_use_for;    /* Do we already have a use for it?  */
 147   unsigned use_id;      /* The identifier in the use if it is the case.  */
 148 };
 149
 150 /* Per-ssa version information (induction variable descriptions, etc.).  */
 151 struct version_info
 152 {
 153   tree name;            /* The ssa name.  */
 154   struct iv *iv;        /* Induction variable description.  */
 155   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 156                            an expression that is not an induction variable.  */
 157   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 158   unsigned inv_id;      /* Id of an invariant.  */
 159 };
 160
 161 /* Types of uses.  */
 162 enum use_type
 163 {
 164   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 165   USE_ADDRESS,          /* Use in an address.  */
 166   USE_COMPARE           /* Use is a compare.  */
 167 };
 168
 169 /* Cost of a computation.  */
 170 typedef struct
 171 {
 172   int cost;             /* The runtime cost.  */
 173   unsigned complexity;  /* The estimate of the complexity of the code for
 174                            the computation (in no concrete units --
 175                            complexity field should be larger for more
 176                            complex expressions and addressing modes).  */
 177 } comp_cost;
 178
 179 static const comp_cost no_cost = {0, 0};
 180 static const comp_cost infinite_cost = {INFTY, INFTY};
 181
 182 /* The candidate - cost pair.  */
 183 struct cost_pair
 184 {
 185   struct iv_cand *cand; /* The candidate.  */
 186   comp_cost cost;       /* The cost.  */
 187   bitmap depends_on;    /* The list of invariants that have to be
 188                            preserved.  */
 189   tree value;           /* For final value elimination, the expression for
 190                            the final value of the iv.  For iv elimination,
 191                            the new bound to compare with.  */
 192   enum tree_code comp;  /* For iv elimination, the comparison.  */
 193   int inv_expr_id;      /* Loop invariant expression id.  */
 194 };
 195
 196 /* Use.  */
 197 struct iv_use
 198 {
 199   unsigned id;          /* The id of the use.  */
 200   enum use_type type;   /* Type of the use.  */
 201   struct iv *iv;        /* The induction variable it is based on.  */
 202   gimple stmt;          /* Statement in that it occurs.  */
 203   tree *op_p;           /* The place where it occurs.  */
 204   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 205                            important ones.  */
 206
 207   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 208   struct cost_pair *cost_map;
 209                         /* The costs wrto the iv candidates.  */
 210
 211   struct iv_cand *selected;
 212                         /* The selected candidate.  */
 213 };
 214
 215 /* The position where the iv is computed.  */
 216 enum iv_position
 217 {
 218   IP_NORMAL,            /* At the end, just before the exit condition.  */
 219   IP_END,               /* At the end of the latch block.  */
 220   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 221   IP_AFTER_USE,         /* Immediately after a specific use.  */
 222   IP_ORIGINAL           /* The original biv.  */
 223 };
 224
 225 /* The induction variable candidate.  */
 226 struct iv_cand
 227 {
 228   unsigned id;          /* The number of the candidate.  */
 229   bool important;       /* Whether this is an "important" candidate, i.e. such
 230                            that it should be considered by all uses.  */
 231   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 232   gimple incremented_at;/* For original biv, the statement where it is
 233                            incremented.  */
 234   tree var_before;      /* The variable used for it before increment.  */
 235   tree var_after;       /* The variable used for it after increment.  */
 236   struct iv *iv;        /* The value of the candidate.  NULL for
 237                            "pseudocandidate" used to indicate the possibility
 238                            to replace the final value of an iv by direct
 239                            computation of the value.  */
 240   unsigned cost;        /* Cost of the candidate.  */
 241   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 242   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 243                               where it is incremented.  */
 244   bitmap depends_on;    /* The list of invariants that are used in step of the
 245                            biv.  */
 246 };
 247
 248 /* Loop invariant expression hashtable entry.  */
 249 struct iv_inv_expr_ent
 250 {
 251   tree expr;
 252   int id;
 253   hashval_t hash;
 254 };
 255
 256 /* The data used by the induction variable optimizations.  */
 257
 258 typedef struct iv_use *iv_use_p;
 259
 260 typedef struct iv_cand *iv_cand_p;
 261
 262 /* Hashtable helpers.  */
 263
 264 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 265 {
 266   typedef iv_inv_expr_ent value_type;
 267   typedef iv_inv_expr_ent compare_type;
 268   static inline hashval_t hash (const value_type *);
 269   static inline bool equal (const value_type *, const compare_type *);
 270 };
 271
 272 /* Hash function for loop invariant expressions.  */
 273
 274 inline hashval_t
 275 iv_inv_expr_hasher::hash (const value_type *expr)
 276 {
 277   return expr->hash;
 278 }
 279
 280 /* Hash table equality function for expressions.  */
 281
 282 inline bool
 283 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 284 {
 285   return expr1->hash == expr2->hash
 286          && operand_equal_p (expr1->expr, expr2->expr, 0);
 287 }
 288
 289 struct ivopts_data
 290 {
 291   /* The currently optimized loop.  */
 292   struct loop *current_loop;
 293
 294   /* Numbers of iterations for all exits of the current loop.  */
 295   struct pointer_map_t *niters;
 296
 297   /* Number of registers used in it.  */
 298   unsigned regs_used;
 299
 300   /* The size of version_info array allocated.  */
 301   unsigned version_info_size;
 302
 303   /* The array of information for the ssa names.  */
 304   struct version_info *version_info;
 305
 306   /* The hashtable of loop invariant expressions created
 307      by ivopt.  */
 308   hash_table <iv_inv_expr_hasher> inv_expr_tab;
 309
 310   /* Loop invariant expression id.  */
 311   int inv_expr_id;
 312
 313   /* The bitmap of indices in version_info whose value was changed.  */
 314   bitmap relevant;
 315
 316   /* The uses of induction variables.  */
 317   vec<iv_use_p> iv_uses;
 318
 319   /* The candidates.  */
 320   vec<iv_cand_p> iv_candidates;
 321
 322   /* A bitmap of important candidates.  */
 323   bitmap important_candidates;
 324
 325   /* The maximum invariant id.  */
 326   unsigned max_inv_id;
 327
 328   /* Whether to consider just related and important candidates when replacing a
 329      use.  */
 330   bool consider_all_candidates;
 331
 332   /* Are we optimizing for speed?  */
 333   bool speed;
 334
 335   /* Whether the loop body includes any function calls.  */
 336   bool body_includes_call;
 337
 338   /* Whether the loop body can only be exited via single exit.  */
 339   bool loop_single_exit_p;
 340 };
 341
 342 /* An assignment of iv candidates to uses.  */
 343
 344 struct iv_ca
 345 {
 346   /* The number of uses covered by the assignment.  */
 347   unsigned upto;
 348
 349   /* Number of uses that cannot be expressed by the candidates in the set.  */
 350   unsigned bad_uses;
 351
 352   /* Candidate assigned to a use, together with the related costs.  */
 353   struct cost_pair **cand_for_use;
 354
 355   /* Number of times each candidate is used.  */
 356   unsigned *n_cand_uses;
 357
 358   /* The candidates used.  */
 359   bitmap cands;
 360
 361   /* The number of candidates in the set.  */
 362   unsigned n_cands;
 363
 364   /* Total number of registers needed.  */
 365   unsigned n_regs;
 366
 367   /* Total cost of expressing uses.  */
 368   comp_cost cand_use_cost;
 369
 370   /* Total cost of candidates.  */
 371   unsigned cand_cost;
 372
 373   /* Number of times each invariant is used.  */
 374   unsigned *n_invariant_uses;
 375
 376   /* The array holding the number of uses of each loop
 377      invariant expressions created by ivopt.  */
 378   unsigned *used_inv_expr;
 379
 380   /* The number of created loop invariants.  */
 381   unsigned num_used_inv_expr;
 382
 383   /* Total cost of the assignment.  */
 384   comp_cost cost;
 385 };
 386
 387 /* Difference of two iv candidate assignments.  */
 388
 389 struct iv_ca_delta
 390 {
 391   /* Changed use.  */
 392   struct iv_use *use;
 393
 394   /* An old assignment (for rollback purposes).  */
 395   struct cost_pair *old_cp;
 396
 397   /* A new assignment.  */
 398   struct cost_pair *new_cp;
 399
 400   /* Next change in the list.  */
 401   struct iv_ca_delta *next_change;
 402 };
 403
 404 /* Bound on number of candidates below that all candidates are considered.  */
 405
 406 #define CONSIDER_ALL_CANDIDATES_BOUND \
 407   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 408
 409 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 410    optimizing such a loop would help, and it would take ages).  */
 411
 412 #define MAX_CONSIDERED_USES \
 413   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 414
 415 /* If there are at most this number of ivs in the set, try removing unnecessary
 416    ivs from the set always.  */
 417
 418 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 419   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 420
 421 /* The list of trees for that the decl_rtl field must be reset is stored
 422    here.  */
 423
 424 static vec<tree> decl_rtl_to_reset;
 425
 426 static comp_cost force_expr_to_var_cost (tree, bool);
 427
 428 /* Number of uses recorded in DATA.  */
 429
 430 static inline unsigned
 431 n_iv_uses (struct ivopts_data *data)
 432 {
 433   return data->iv_uses.length ();
 434 }
 435
 436 /* Ith use recorded in DATA.  */
 437
 438 static inline struct iv_use *
 439 iv_use (struct ivopts_data *data, unsigned i)
 440 {
 441   return data->iv_uses[i];
 442 }
 443
 444 /* Number of candidates recorded in DATA.  */
 445
 446 static inline unsigned
 447 n_iv_cands (struct ivopts_data *data)
 448 {
 449   return data->iv_candidates.length ();
 450 }
 451
 452 /* Ith candidate recorded in DATA.  */
 453
 454 static inline struct iv_cand *
 455 iv_cand (struct ivopts_data *data, unsigned i)
 456 {
 457   return data->iv_candidates[i];
 458 }
 459
 460 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 461
 462 edge
 463 single_dom_exit (struct loop *loop)
 464 {
 465   edge exit = single_exit (loop);
 466
 467   if (!exit)
 468     return NULL;
 469
 470   if (!just_once_each_iteration_p (loop, exit->src))
 471     return NULL;
 472
 473   return exit;
 474 }
 475
 476 /* Dumps information about the induction variable IV to FILE.  */
 477
 478 void
 479 dump_iv (FILE *file, struct iv *iv)
 480 {
 481   if (iv->ssa_name)
 482     {
 483       fprintf (file, "ssa name ");
 484       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 485       fprintf (file, "\n");
 486     }
 487
 488   fprintf (file, "  type ");
 489   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 490   fprintf (file, "\n");
 491
 492   if (iv->step)
 493     {
 494       fprintf (file, "  base ");
 495       print_generic_expr (file, iv->base, TDF_SLIM);
 496       fprintf (file, "\n");
 497
 498       fprintf (file, "  step ");
 499       print_generic_expr (file, iv->step, TDF_SLIM);
 500       fprintf (file, "\n");
 501     }
 502   else
 503     {
 504       fprintf (file, "  invariant ");
 505       print_generic_expr (file, iv->base, TDF_SLIM);
 506       fprintf (file, "\n");
 507     }
 508
 509   if (iv->base_object)
 510     {
 511       fprintf (file, "  base object ");
 512       print_generic_expr (file, iv->base_object, TDF_SLIM);
 513       fprintf (file, "\n");
 514     }
 515
 516   if (iv->biv_p)
 517     fprintf (file, "  is a biv\n");
 518 }
 519
 520 /* Dumps information about the USE to FILE.  */
 521
 522 void
 523 dump_use (FILE *file, struct iv_use *use)
 524 {
 525   fprintf (file, "use %d\n", use->id);
 526
 527   switch (use->type)
 528     {
 529     case USE_NONLINEAR_EXPR:
 530       fprintf (file, "  generic\n");
 531       break;
 532
 533     case USE_ADDRESS:
 534       fprintf (file, "  address\n");
 535       break;
 536
 537     case USE_COMPARE:
 538       fprintf (file, "  compare\n");
 539       break;
 540
 541     default:
 542       gcc_unreachable ();
 543     }
 544
 545   fprintf (file, "  in statement ");
 546   print_gimple_stmt (file, use->stmt, 0, 0);
 547   fprintf (file, "\n");
 548
 549   fprintf (file, "  at position ");
 550   if (use->op_p)
 551     print_generic_expr (file, *use->op_p, TDF_SLIM);
 552   fprintf (file, "\n");
 553
 554   dump_iv (file, use->iv);
 555
 556   if (use->related_cands)
 557     {
 558       fprintf (file, "  related candidates ");
 559       dump_bitmap (file, use->related_cands);
 560     }
 561 }
 562
 563 /* Dumps information about the uses to FILE.  */
 564
 565 void
 566 dump_uses (FILE *file, struct ivopts_data *data)
 567 {
 568   unsigned i;
 569   struct iv_use *use;
 570
 571   for (i = 0; i < n_iv_uses (data); i++)
 572     {
 573       use = iv_use (data, i);
 574
 575       dump_use (file, use);
 576       fprintf (file, "\n");
 577     }
 578 }
 579
 580 /* Dumps information about induction variable candidate CAND to FILE.  */
 581
 582 void
 583 dump_cand (FILE *file, struct iv_cand *cand)
 584 {
 585   struct iv *iv = cand->iv;
 586
 587   fprintf (file, "candidate %d%s\n",
 588            cand->id, cand->important ? " (important)" : "");
 589
 590   if (cand->depends_on)
 591     {
 592       fprintf (file, "  depends on ");
 593       dump_bitmap (file, cand->depends_on);
 594     }
 595
 596   if (!iv)
 597     {
 598       fprintf (file, "  final value replacement\n");
 599       return;
 600     }
 601
 602   if (cand->var_before)
 603     {
 604       fprintf (file, "  var_before ");
 605       print_generic_expr (file, cand->var_before, TDF_SLIM);
 606       fprintf (file, "\n");
 607     }
 608   if (cand->var_after)
 609     {
 610       fprintf (file, "  var_after ");
 611       print_generic_expr (file, cand->var_after, TDF_SLIM);
 612       fprintf (file, "\n");
 613     }
 614
 615   switch (cand->pos)
 616     {
 617     case IP_NORMAL:
 618       fprintf (file, "  incremented before exit test\n");
 619       break;
 620
 621     case IP_BEFORE_USE:
 622       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 623       break;
 624
 625     case IP_AFTER_USE:
 626       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 627       break;
 628
 629     case IP_END:
 630       fprintf (file, "  incremented at end\n");
 631       break;
 632
 633     case IP_ORIGINAL:
 634       fprintf (file, "  original biv\n");
 635       break;
 636     }
 637
 638   dump_iv (file, iv);
 639 }
 640
 641 /* Returns the info for ssa version VER.  */
 642
 643 static inline struct version_info *
 644 ver_info (struct ivopts_data *data, unsigned ver)
 645 {
 646   return data->version_info + ver;
 647 }
 648
 649 /* Returns the info for ssa name NAME.  */
 650
 651 static inline struct version_info *
 652 name_info (struct ivopts_data *data, tree name)
 653 {
 654   return ver_info (data, SSA_NAME_VERSION (name));
 655 }
 656
 657 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 658    emitted in LOOP.  */
 659
 660 static bool
 661 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 662 {
 663   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 664
 665   gcc_assert (bb);
 666
 667   if (sbb == loop->latch)
 668     return true;
 669
 670   if (sbb != bb)
 671     return false;
 672
 673   return stmt == last_stmt (bb);
 674 }
 675
 676 /* Returns true if STMT if after the place where the original induction
 677    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 678    if the positions are identical.  */
 679
 680 static bool
 681 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 682 {
 683   basic_block cand_bb = gimple_bb (cand->incremented_at);
 684   basic_block stmt_bb = gimple_bb (stmt);
 685
 686   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 687     return false;
 688
 689   if (stmt_bb != cand_bb)
 690     return true;
 691
 692   if (true_if_equal
 693       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 694     return true;
 695   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 696 }
 697
 698 /* Returns true if STMT if after the place where the induction variable
 699    CAND is incremented in LOOP.  */
 700
 701 static bool
 702 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 703 {
 704   switch (cand->pos)
 705     {
 706     case IP_END:
 707       return false;
 708
 709     case IP_NORMAL:
 710       return stmt_after_ip_normal_pos (loop, stmt);
 711
 712     case IP_ORIGINAL:
 713     case IP_AFTER_USE:
 714       return stmt_after_inc_pos (cand, stmt, false);
 715
 716     case IP_BEFORE_USE:
 717       return stmt_after_inc_pos (cand, stmt, true);
 718
 719     default:
 720       gcc_unreachable ();
 721     }
 722 }
 723
 724 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 725
 726 static bool
 727 abnormal_ssa_name_p (tree exp)
 728 {
 729   if (!exp)
 730     return false;
 731
 732   if (TREE_CODE (exp) != SSA_NAME)
 733     return false;
 734
 735   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 736 }
 737
 738 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 739    abnormal phi node.  Callback for for_each_index.  */
 740
 741 static bool
 742 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 743                                   void *data ATTRIBUTE_UNUSED)
 744 {
 745   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 746     {
 747       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 748         return false;
 749       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 750         return false;
 751     }
 752
 753   return !abnormal_ssa_name_p (*index);
 754 }
 755
 756 /* Returns true if EXPR contains a ssa name that occurs in an
 757    abnormal phi node.  */
 758
 759 bool
 760 contains_abnormal_ssa_name_p (tree expr)
 761 {
 762   enum tree_code code;
 763   enum tree_code_class codeclass;
 764
 765   if (!expr)
 766     return false;
 767
 768   code = TREE_CODE (expr);
 769   codeclass = TREE_CODE_CLASS (code);
 770
 771   if (code == SSA_NAME)
 772     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 773
 774   if (code == INTEGER_CST
 775       || is_gimple_min_invariant (expr))
 776     return false;
 777
 778   if (code == ADDR_EXPR)
 779     return !for_each_index (&TREE_OPERAND (expr, 0),
 780                             idx_contains_abnormal_ssa_name_p,
 781                             NULL);
 782
 783   if (code == COND_EXPR)
 784     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 785       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 786       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 787
 788   switch (codeclass)
 789     {
 790     case tcc_binary:
 791     case tcc_comparison:
 792       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 793         return true;
 794
 795       /* Fallthru.  */
 796     case tcc_unary:
 797       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 798         return true;
 799
 800       break;
 801
 802     default:
 803       gcc_unreachable ();
 804     }
 805
 806   return false;
 807 }
 808
 809 /*  Returns the structure describing number of iterations determined from
 810     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 811
 812 static struct tree_niter_desc *
 813 niter_for_exit (struct ivopts_data *data, edge exit)
 814 {
 815   struct tree_niter_desc *desc;
 816   void **slot;
 817
 818   if (!data->niters)
 819     {
 820       data->niters = pointer_map_create ();
 821       slot = NULL;
 822     }
 823   else
 824     slot = pointer_map_contains (data->niters, exit);
 825
 826   if (!slot)
 827     {
 828       /* Try to determine number of iterations.  We cannot safely work with ssa
 829          names that appear in phi nodes on abnormal edges, so that we do not
 830          create overlapping life ranges for them (PR 27283).  */
 831       desc = XNEW (struct tree_niter_desc);
 832       if (!number_of_iterations_exit (data->current_loop,
 833                                       exit, desc, true)
 834           || contains_abnormal_ssa_name_p (desc->niter))
 835         {
 836           XDELETE (desc);
 837           desc = NULL;
 838         }
 839       slot = pointer_map_insert (data->niters, exit);
 840       *slot = desc;
 841     }
 842   else
 843     desc = (struct tree_niter_desc *) *slot;
 844
 845   return desc;
 846 }
 847
 848 /* Returns the structure describing number of iterations determined from
 849    single dominating exit of DATA->current_loop, or NULL if something
 850    goes wrong.  */
 851
 852 static struct tree_niter_desc *
 853 niter_for_single_dom_exit (struct ivopts_data *data)
 854 {
 855   edge exit = single_dom_exit (data->current_loop);
 856
 857   if (!exit)
 858     return NULL;
 859
 860   return niter_for_exit (data, exit);
 861 }
 862
 863 /* Initializes data structures used by the iv optimization pass, stored
 864    in DATA.  */
 865
 866 static void
 867 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 868 {
 869   data->version_info_size = 2 * num_ssa_names;
 870   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 871   data->relevant = BITMAP_ALLOC (NULL);
 872   data->important_candidates = BITMAP_ALLOC (NULL);
 873   data->max_inv_id = 0;
 874   data->niters = NULL;
 875   data->iv_uses.create (20);
 876   data->iv_candidates.create (20);
 877   data->inv_expr_tab.create (10);
 878   data->inv_expr_id = 0;
 879   decl_rtl_to_reset.create (20);
 880 }
 881
 882 /* Returns a memory object to that EXPR points.  In case we are able to
 883    determine that it does not point to any such object, NULL is returned.  */
 884
 885 static tree
 886 determine_base_object (tree expr)
 887 {
 888   enum tree_code code = TREE_CODE (expr);
 889   tree base, obj;
 890
 891   /* If this is a pointer casted to any type, we need to determine
 892      the base object for the pointer; so handle conversions before
 893      throwing away non-pointer expressions.  */
 894   if (CONVERT_EXPR_P (expr))
 895     return determine_base_object (TREE_OPERAND (expr, 0));
 896
 897   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 898     return NULL_TREE;
 899
 900   switch (code)
 901     {
 902     case INTEGER_CST:
 903       return NULL_TREE;
 904
 905     case ADDR_EXPR:
 906       obj = TREE_OPERAND (expr, 0);
 907       base = get_base_address (obj);
 908
 909       if (!base)
 910         return expr;
 911
 912       if (TREE_CODE (base) == MEM_REF)
 913         return determine_base_object (TREE_OPERAND (base, 0));
 914
 915       return fold_convert (ptr_type_node,
 916                            build_fold_addr_expr (base));
 917
 918     case POINTER_PLUS_EXPR:
 919       return determine_base_object (TREE_OPERAND (expr, 0));
 920
 921     case PLUS_EXPR:
 922     case MINUS_EXPR:
 923       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 924       gcc_unreachable ();
 925
 926     default:
 927       return fold_convert (ptr_type_node, expr);
 928     }
 929 }
 930
 931 /* Allocates an induction variable with given initial value BASE and step STEP
 932    for loop LOOP.  */
 933
 934 static struct iv *
 935 alloc_iv (tree base, tree step)
 936 {
 937   tree base_object = base;
 938   struct iv *iv = XCNEW (struct iv);
 939   gcc_assert (step != NULL_TREE);
 940
 941   /* Lower all address expressions except ones with DECL_P as operand.
 942      By doing this:
 943        1) More accurate cost can be computed for address expressions;
 944        2) Duplicate candidates won't be created for bases in different
 945           forms, like &a[0] and &a.  */
 946   STRIP_NOPS (base_object);
 947   if (TREE_CODE (base_object) == ADDR_EXPR
 948       && !DECL_P (TREE_OPERAND (base_object, 0)))
 949     {
 950       aff_tree comb;
 951       double_int size;
 952       base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
 953                                              &comb, &size);
 954       gcc_assert (base_object != NULL_TREE);
 955       base_object = build_fold_addr_expr (base_object);
 956       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
 957     }
 958
 959   iv->base = base;
 960   iv->base_object = determine_base_object (base_object);
 961   iv->step = step;
 962   iv->biv_p = false;
 963   iv->have_use_for = false;
 964   iv->use_id = 0;
 965   iv->ssa_name = NULL_TREE;
 966
 967   return iv;
 968 }
 969
 970 /* Sets STEP and BASE for induction variable IV.  */
 971
 972 static void
 973 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 974 {
 975   struct version_info *info = name_info (data, iv);
 976
 977   gcc_assert (!info->iv);
 978
 979   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 980   info->iv = alloc_iv (base, step);
 981   info->iv->ssa_name = iv;
 982 }
 983
 984 /* Finds induction variable declaration for VAR.  */
 985
 986 static struct iv *
 987 get_iv (struct ivopts_data *data, tree var)
 988 {
 989   basic_block bb;
 990   tree type = TREE_TYPE (var);
 991
 992   if (!POINTER_TYPE_P (type)
 993       && !INTEGRAL_TYPE_P (type))
 994     return NULL;
 995
 996   if (!name_info (data, var)->iv)
 997     {
 998       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 999
1000       if (!bb
1001           || !flow_bb_inside_loop_p (data->current_loop, bb))
1002         set_iv (data, var, var, build_int_cst (type, 0));
1003     }
1004
1005   return name_info (data, var)->iv;
1006 }
1007
1008 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1009    not define a simple affine biv with nonzero step.  */
1010
1011 static tree
1012 determine_biv_step (gimple phi)
1013 {
1014   struct loop *loop = gimple_bb (phi)->loop_father;
1015   tree name = PHI_RESULT (phi);
1016   affine_iv iv;
1017
1018   if (virtual_operand_p (name))
1019     return NULL_TREE;
1020
1021   if (!simple_iv (loop, loop, name, &iv, true))
1022     return NULL_TREE;
1023
1024   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1025 }
1026
1027 /* Finds basic ivs.  */
1028
1029 static bool
1030 find_bivs (struct ivopts_data *data)
1031 {
1032   gimple phi;
1033   tree step, type, base;
1034   bool found = false;
1035   struct loop *loop = data->current_loop;
1036   gimple_stmt_iterator psi;
1037
1038   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1039     {
1040       phi = gsi_stmt (psi);
1041
1042       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1043         continue;
1044
1045       step = determine_biv_step (phi);
1046       if (!step)
1047         continue;
1048
1049       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1050       base = expand_simple_operations (base);
1051       if (contains_abnormal_ssa_name_p (base)
1052           || contains_abnormal_ssa_name_p (step))
1053         continue;
1054
1055       type = TREE_TYPE (PHI_RESULT (phi));
1056       base = fold_convert (type, base);
1057       if (step)
1058         {
1059           if (POINTER_TYPE_P (type))
1060             step = convert_to_ptrofftype (step);
1061           else
1062             step = fold_convert (type, step);
1063         }
1064
1065       set_iv (data, PHI_RESULT (phi), base, step);
1066       found = true;
1067     }
1068
1069   return found;
1070 }
1071
1072 /* Marks basic ivs.  */
1073
1074 static void
1075 mark_bivs (struct ivopts_data *data)
1076 {
1077   gimple phi, def;
1078   tree var;
1079   struct iv *iv, *incr_iv;
1080   struct loop *loop = data->current_loop;
1081   basic_block incr_bb;
1082   gimple_stmt_iterator psi;
1083
1084   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1085     {
1086       phi = gsi_stmt (psi);
1087
1088       iv = get_iv (data, PHI_RESULT (phi));
1089       if (!iv)
1090         continue;
1091
1092       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1093       def = SSA_NAME_DEF_STMT (var);
1094       /* Don't mark iv peeled from other one as biv.  */
1095       if (def
1096           && gimple_code (def) == GIMPLE_PHI
1097           && gimple_bb (def) == loop->header)
1098         continue;
1099
1100       incr_iv = get_iv (data, var);
1101       if (!incr_iv)
1102         continue;
1103
1104       /* If the increment is in the subloop, ignore it.  */
1105       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1106       if (incr_bb->loop_father != data->current_loop
1107           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1108         continue;
1109
1110       iv->biv_p = true;
1111       incr_iv->biv_p = true;
1112     }
1113 }
1114
1115 /* Checks whether STMT defines a linear induction variable and stores its
1116    parameters to IV.  */
1117
1118 static bool
1119 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1120 {
1121   tree lhs;
1122   struct loop *loop = data->current_loop;
1123
1124   iv->base = NULL_TREE;
1125   iv->step = NULL_TREE;
1126
1127   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1128     return false;
1129
1130   lhs = gimple_assign_lhs (stmt);
1131   if (TREE_CODE (lhs) != SSA_NAME)
1132     return false;
1133
1134   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1135     return false;
1136   iv->base = expand_simple_operations (iv->base);
1137
1138   if (contains_abnormal_ssa_name_p (iv->base)
1139       || contains_abnormal_ssa_name_p (iv->step))
1140     return false;
1141
1142   /* If STMT could throw, then do not consider STMT as defining a GIV.
1143      While this will suppress optimizations, we can not safely delete this
1144      GIV and associated statements, even if it appears it is not used.  */
1145   if (stmt_could_throw_p (stmt))
1146     return false;
1147
1148   return true;
1149 }
1150
1151 /* Finds general ivs in statement STMT.  */
1152
1153 static void
1154 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1155 {
1156   affine_iv iv;
1157
1158   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1159     return;
1160
1161   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1162 }
1163
1164 /* Finds general ivs in basic block BB.  */
1165
1166 static void
1167 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1168 {
1169   gimple_stmt_iterator bsi;
1170
1171   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1172     find_givs_in_stmt (data, gsi_stmt (bsi));
1173 }
1174
1175 /* Finds general ivs.  */
1176
1177 static void
1178 find_givs (struct ivopts_data *data)
1179 {
1180   struct loop *loop = data->current_loop;
1181   basic_block *body = get_loop_body_in_dom_order (loop);
1182   unsigned i;
1183
1184   for (i = 0; i < loop->num_nodes; i++)
1185     find_givs_in_bb (data, body[i]);
1186   free (body);
1187 }
1188
1189 /* For each ssa name defined in LOOP determines whether it is an induction
1190    variable and if so, its initial value and step.  */
1191
1192 static bool
1193 find_induction_variables (struct ivopts_data *data)
1194 {
1195   unsigned i;
1196   bitmap_iterator bi;
1197
1198   if (!find_bivs (data))
1199     return false;
1200
1201   find_givs (data);
1202   mark_bivs (data);
1203
1204   if (dump_file && (dump_flags & TDF_DETAILS))
1205     {
1206       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1207
1208       if (niter)
1209         {
1210           fprintf (dump_file, "  number of iterations ");
1211           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1212           if (!integer_zerop (niter->may_be_zero))
1213             {
1214               fprintf (dump_file, "; zero if ");
1215               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1216             }
1217           fprintf (dump_file, "\n\n");
1218         };
1219
1220       fprintf (dump_file, "Induction variables:\n\n");
1221
1222       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1223         {
1224           if (ver_info (data, i)->iv)
1225             dump_iv (dump_file, ver_info (data, i)->iv);
1226         }
1227     }
1228
1229   return true;
1230 }
1231
1232 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1233
1234 static struct iv_use *
1235 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1236             gimple stmt, enum use_type use_type)
1237 {
1238   struct iv_use *use = XCNEW (struct iv_use);
1239
1240   use->id = n_iv_uses (data);
1241   use->type = use_type;
1242   use->iv = iv;
1243   use->stmt = stmt;
1244   use->op_p = use_p;
1245   use->related_cands = BITMAP_ALLOC (NULL);
1246
1247   /* To avoid showing ssa name in the dumps, if it was not reset by the
1248      caller.  */
1249   iv->ssa_name = NULL_TREE;
1250
1251   if (dump_file && (dump_flags & TDF_DETAILS))
1252     dump_use (dump_file, use);
1253
1254   data->iv_uses.safe_push (use);
1255
1256   return use;
1257 }
1258
1259 /* Checks whether OP is a loop-level invariant and if so, records it.
1260    NONLINEAR_USE is true if the invariant is used in a way we do not
1261    handle specially.  */
1262
1263 static void
1264 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1265 {
1266   basic_block bb;
1267   struct version_info *info;
1268
1269   if (TREE_CODE (op) != SSA_NAME
1270       || virtual_operand_p (op))
1271     return;
1272
1273   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1274   if (bb
1275       && flow_bb_inside_loop_p (data->current_loop, bb))
1276     return;
1277
1278   info = name_info (data, op);
1279   info->name = op;
1280   info->has_nonlin_use |= nonlinear_use;
1281   if (!info->inv_id)
1282     info->inv_id = ++data->max_inv_id;
1283   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1284 }
1285
1286 /* Checks whether the use OP is interesting and if so, records it.  */
1287
1288 static struct iv_use *
1289 find_interesting_uses_op (struct ivopts_data *data, tree op)
1290 {
1291   struct iv *iv;
1292   struct iv *civ;
1293   gimple stmt;
1294   struct iv_use *use;
1295
1296   if (TREE_CODE (op) != SSA_NAME)
1297     return NULL;
1298
1299   iv = get_iv (data, op);
1300   if (!iv)
1301     return NULL;
1302
1303   if (iv->have_use_for)
1304     {
1305       use = iv_use (data, iv->use_id);
1306
1307       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1308       return use;
1309     }
1310
1311   if (integer_zerop (iv->step))
1312     {
1313       record_invariant (data, op, true);
1314       return NULL;
1315     }
1316   iv->have_use_for = true;
1317
1318   civ = XNEW (struct iv);
1319   *civ = *iv;
1320
1321   stmt = SSA_NAME_DEF_STMT (op);
1322   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1323               || is_gimple_assign (stmt));
1324
1325   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1326   iv->use_id = use->id;
1327
1328   return use;
1329 }
1330
1331 /* Given a condition in statement STMT, checks whether it is a compare
1332    of an induction variable and an invariant.  If this is the case,
1333    CONTROL_VAR is set to location of the iv, BOUND to the location of
1334    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1335    induction variable descriptions, and true is returned.  If this is not
1336    the case, CONTROL_VAR and BOUND are set to the arguments of the
1337    condition and false is returned.  */
1338
1339 static bool
1340 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1341                        tree **control_var, tree **bound,
1342                        struct iv **iv_var, struct iv **iv_bound)
1343 {
1344   /* The objects returned when COND has constant operands.  */
1345   static struct iv const_iv;
1346   static tree zero;
1347   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1348   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1349   bool ret = false;
1350
1351   if (gimple_code (stmt) == GIMPLE_COND)
1352     {
1353       op0 = gimple_cond_lhs_ptr (stmt);
1354       op1 = gimple_cond_rhs_ptr (stmt);
1355     }
1356   else
1357     {
1358       op0 = gimple_assign_rhs1_ptr (stmt);
1359       op1 = gimple_assign_rhs2_ptr (stmt);
1360     }
1361
1362   zero = integer_zero_node;
1363   const_iv.step = integer_zero_node;
1364
1365   if (TREE_CODE (*op0) == SSA_NAME)
1366     iv0 = get_iv (data, *op0);
1367   if (TREE_CODE (*op1) == SSA_NAME)
1368     iv1 = get_iv (data, *op1);
1369
1370   /* Exactly one of the compared values must be an iv, and the other one must
1371      be an invariant.  */
1372   if (!iv0 || !iv1)
1373     goto end;
1374
1375   if (integer_zerop (iv0->step))
1376     {
1377       /* Control variable may be on the other side.  */
1378       tmp_op = op0; op0 = op1; op1 = tmp_op;
1379       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1380     }
1381   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1382
1383 end:
1384   if (control_var)
1385     *control_var = op0;;
1386   if (iv_var)
1387     *iv_var = iv0;;
1388   if (bound)
1389     *bound = op1;
1390   if (iv_bound)
1391     *iv_bound = iv1;
1392
1393   return ret;
1394 }
1395
1396 /* Checks whether the condition in STMT is interesting and if so,
1397    records it.  */
1398
1399 static void
1400 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1401 {
1402   tree *var_p, *bound_p;
1403   struct iv *var_iv, *civ;
1404
1405   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1406     {
1407       find_interesting_uses_op (data, *var_p);
1408       find_interesting_uses_op (data, *bound_p);
1409       return;
1410     }
1411
1412   civ = XNEW (struct iv);
1413   *civ = *var_iv;
1414   record_use (data, NULL, civ, stmt, USE_COMPARE);
1415 }
1416
1417 /* Returns the outermost loop EXPR is obviously invariant in
1418    relative to the loop LOOP, i.e. if all its operands are defined
1419    outside of the returned loop.  Returns NULL if EXPR is not
1420    even obviously invariant in LOOP.  */
1421
1422 struct loop *
1423 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1424 {
1425   basic_block def_bb;
1426   unsigned i, len;
1427
1428   if (is_gimple_min_invariant (expr))
1429     return current_loops->tree_root;
1430
1431   if (TREE_CODE (expr) == SSA_NAME)
1432     {
1433       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1434       if (def_bb)
1435         {
1436           if (flow_bb_inside_loop_p (loop, def_bb))
1437             return NULL;
1438           return superloop_at_depth (loop,
1439                                      loop_depth (def_bb->loop_father) + 1);
1440         }
1441
1442       return current_loops->tree_root;
1443     }
1444
1445   if (!EXPR_P (expr))
1446     return NULL;
1447
1448   unsigned maxdepth = 0;
1449   len = TREE_OPERAND_LENGTH (expr);
1450   for (i = 0; i < len; i++)
1451     {
1452       struct loop *ivloop;
1453       if (!TREE_OPERAND (expr, i))
1454         continue;
1455
1456       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1457       if (!ivloop)
1458         return NULL;
1459       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1460     }
1461
1462   return superloop_at_depth (loop, maxdepth);
1463 }
1464
1465 /* Returns true if expression EXPR is obviously invariant in LOOP,
1466    i.e. if all its operands are defined outside of the LOOP.  LOOP
1467    should not be the function body.  */
1468
1469 bool
1470 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1471 {
1472   basic_block def_bb;
1473   unsigned i, len;
1474
1475   gcc_assert (loop_depth (loop) > 0);
1476
1477   if (is_gimple_min_invariant (expr))
1478     return true;
1479
1480   if (TREE_CODE (expr) == SSA_NAME)
1481     {
1482       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1483       if (def_bb
1484           && flow_bb_inside_loop_p (loop, def_bb))
1485         return false;
1486
1487       return true;
1488     }
1489
1490   if (!EXPR_P (expr))
1491     return false;
1492
1493   len = TREE_OPERAND_LENGTH (expr);
1494   for (i = 0; i < len; i++)
1495     if (TREE_OPERAND (expr, i)
1496         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1497       return false;
1498
1499   return true;
1500 }
1501
1502 /* Cumulates the steps of indices into DATA and replaces their values with the
1503    initial ones.  Returns false when the value of the index cannot be determined.
1504    Callback for for_each_index.  */
1505
1506 struct ifs_ivopts_data
1507 {
1508   struct ivopts_data *ivopts_data;
1509   gimple stmt;
1510   tree step;
1511 };
1512
1513 static bool
1514 idx_find_step (tree base, tree *idx, void *data)
1515 {
1516   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1517   struct iv *iv;
1518   tree step, iv_base, iv_step, lbound, off;
1519   struct loop *loop = dta->ivopts_data->current_loop;
1520
1521   /* If base is a component ref, require that the offset of the reference
1522      be invariant.  */
1523   if (TREE_CODE (base) == COMPONENT_REF)
1524     {
1525       off = component_ref_field_offset (base);
1526       return expr_invariant_in_loop_p (loop, off);
1527     }
1528
1529   /* If base is array, first check whether we will be able to move the
1530      reference out of the loop (in order to take its address in strength
1531      reduction).  In order for this to work we need both lower bound
1532      and step to be loop invariants.  */
1533   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1534     {
1535       /* Moreover, for a range, the size needs to be invariant as well.  */
1536       if (TREE_CODE (base) == ARRAY_RANGE_REF
1537           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1538         return false;
1539
1540       step = array_ref_element_size (base);
1541       lbound = array_ref_low_bound (base);
1542
1543       if (!expr_invariant_in_loop_p (loop, step)
1544           || !expr_invariant_in_loop_p (loop, lbound))
1545         return false;
1546     }
1547
1548   if (TREE_CODE (*idx) != SSA_NAME)
1549     return true;
1550
1551   iv = get_iv (dta->ivopts_data, *idx);
1552   if (!iv)
1553     return false;
1554
1555   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1556           *&x[0], which is not folded and does not trigger the
1557           ARRAY_REF path below.  */
1558   *idx = iv->base;
1559
1560   if (integer_zerop (iv->step))
1561     return true;
1562
1563   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1564     {
1565       step = array_ref_element_size (base);
1566
1567       /* We only handle addresses whose step is an integer constant.  */
1568       if (TREE_CODE (step) != INTEGER_CST)
1569         return false;
1570     }
1571   else
1572     /* The step for pointer arithmetics already is 1 byte.  */
1573     step = size_one_node;
1574
1575   iv_base = iv->base;
1576   iv_step = iv->step;
1577   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1578                             sizetype, &iv_base, &iv_step, dta->stmt,
1579                             false))
1580     {
1581       /* The index might wrap.  */
1582       return false;
1583     }
1584
1585   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1586   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1587
1588   return true;
1589 }
1590
1591 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1592    object is passed to it in DATA.  */
1593
1594 static bool
1595 idx_record_use (tree base, tree *idx,
1596                 void *vdata)
1597 {
1598   struct ivopts_data *data = (struct ivopts_data *) vdata;
1599   find_interesting_uses_op (data, *idx);
1600   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1601     {
1602       find_interesting_uses_op (data, array_ref_element_size (base));
1603       find_interesting_uses_op (data, array_ref_low_bound (base));
1604     }
1605   return true;
1606 }
1607
1608 /* If we can prove that TOP = cst * BOT for some constant cst,
1609    store cst to MUL and return true.  Otherwise return false.
1610    The returned value is always sign-extended, regardless of the
1611    signedness of TOP and BOT.  */
1612
1613 static bool
1614 constant_multiple_of (tree top, tree bot, double_int *mul)
1615 {
1616   tree mby;
1617   enum tree_code code;
1618   double_int res, p0, p1;
1619   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1620
1621   STRIP_NOPS (top);
1622   STRIP_NOPS (bot);
1623
1624   if (operand_equal_p (top, bot, 0))
1625     {
1626       *mul = double_int_one;
1627       return true;
1628     }
1629
1630   code = TREE_CODE (top);
1631   switch (code)
1632     {
1633     case MULT_EXPR:
1634       mby = TREE_OPERAND (top, 1);
1635       if (TREE_CODE (mby) != INTEGER_CST)
1636         return false;
1637
1638       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1639         return false;
1640
1641       *mul = (res * tree_to_double_int (mby)).sext (precision);
1642       return true;
1643
1644     case PLUS_EXPR:
1645     case MINUS_EXPR:
1646       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1647           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1648         return false;
1649
1650       if (code == MINUS_EXPR)
1651         p1 = -p1;
1652       *mul = (p0 + p1).sext (precision);
1653       return true;
1654
1655     case INTEGER_CST:
1656       if (TREE_CODE (bot) != INTEGER_CST)
1657         return false;
1658
1659       p0 = tree_to_double_int (top).sext (precision);
1660       p1 = tree_to_double_int (bot).sext (precision);
1661       if (p1.is_zero ())
1662         return false;
1663       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1664       return res.is_zero ();
1665
1666     default:
1667       return false;
1668     }
1669 }
1670
1671 /* Return true if memory reference REF with step STEP may be unaligned.  */
1672
1673 static bool
1674 may_be_unaligned_p (tree ref, tree step)
1675 {
1676   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1677      thus they are not misaligned.  */
1678   if (TREE_CODE (ref) == TARGET_MEM_REF)
1679     return false;
1680
1681   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1682
1683   unsigned HOST_WIDE_INT bitpos;
1684   unsigned int ref_align;
1685   get_object_alignment_1 (ref, &ref_align, &bitpos);
1686   if (ref_align < align
1687       || (bitpos % align) != 0
1688       || (bitpos % BITS_PER_UNIT) != 0)
1689     return true;
1690
1691   unsigned int trailing_zeros = tree_ctz (step);
1692   if (trailing_zeros < HOST_BITS_PER_INT
1693       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1694     return true;
1695
1696   return false;
1697 }
1698
1699 /* Return true if EXPR may be non-addressable.   */
1700
1701 bool
1702 may_be_nonaddressable_p (tree expr)
1703 {
1704   switch (TREE_CODE (expr))
1705     {
1706     case TARGET_MEM_REF:
1707       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1708          target, thus they are always addressable.  */
1709       return false;
1710
1711     case COMPONENT_REF:
1712       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1713              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1714
1715     case VIEW_CONVERT_EXPR:
1716       /* This kind of view-conversions may wrap non-addressable objects
1717          and make them look addressable.  After some processing the
1718          non-addressability may be uncovered again, causing ADDR_EXPRs
1719          of inappropriate objects to be built.  */
1720       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1721           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1722         return true;
1723
1724       /* ... fall through ... */
1725
1726     case ARRAY_REF:
1727     case ARRAY_RANGE_REF:
1728       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1729
1730     CASE_CONVERT:
1731       return true;
1732
1733     default:
1734       break;
1735     }
1736
1737   return false;
1738 }
1739
1740 /* Finds addresses in *OP_P inside STMT.  */
1741
1742 static void
1743 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1744 {
1745   tree base = *op_p, step = size_zero_node;
1746   struct iv *civ;
1747   struct ifs_ivopts_data ifs_ivopts_data;
1748
1749   /* Do not play with volatile memory references.  A bit too conservative,
1750      perhaps, but safe.  */
1751   if (gimple_has_volatile_ops (stmt))
1752     goto fail;
1753
1754   /* Ignore bitfields for now.  Not really something terribly complicated
1755      to handle.  TODO.  */
1756   if (TREE_CODE (base) == BIT_FIELD_REF)
1757     goto fail;
1758
1759   base = unshare_expr (base);
1760
1761   if (TREE_CODE (base) == TARGET_MEM_REF)
1762     {
1763       tree type = build_pointer_type (TREE_TYPE (base));
1764       tree astep;
1765
1766       if (TMR_BASE (base)
1767           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1768         {
1769           civ = get_iv (data, TMR_BASE (base));
1770           if (!civ)
1771             goto fail;
1772
1773           TMR_BASE (base) = civ->base;
1774           step = civ->step;
1775         }
1776       if (TMR_INDEX2 (base)
1777           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1778         {
1779           civ = get_iv (data, TMR_INDEX2 (base));
1780           if (!civ)
1781             goto fail;
1782
1783           TMR_INDEX2 (base) = civ->base;
1784           step = civ->step;
1785         }
1786       if (TMR_INDEX (base)
1787           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1788         {
1789           civ = get_iv (data, TMR_INDEX (base));
1790           if (!civ)
1791             goto fail;
1792
1793           TMR_INDEX (base) = civ->base;
1794           astep = civ->step;
1795
1796           if (astep)
1797             {
1798               if (TMR_STEP (base))
1799                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1800
1801               step = fold_build2 (PLUS_EXPR, type, step, astep);
1802             }
1803         }
1804
1805       if (integer_zerop (step))
1806         goto fail;
1807       base = tree_mem_ref_addr (type, base);
1808     }
1809   else
1810     {
1811       ifs_ivopts_data.ivopts_data = data;
1812       ifs_ivopts_data.stmt = stmt;
1813       ifs_ivopts_data.step = size_zero_node;
1814       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1815           || integer_zerop (ifs_ivopts_data.step))
1816         goto fail;
1817       step = ifs_ivopts_data.step;
1818
1819       /* Check that the base expression is addressable.  This needs
1820          to be done after substituting bases of IVs into it.  */
1821       if (may_be_nonaddressable_p (base))
1822         goto fail;
1823
1824       /* Moreover, on strict alignment platforms, check that it is
1825          sufficiently aligned.  */
1826       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1827         goto fail;
1828
1829       base = build_fold_addr_expr (base);
1830
1831       /* Substituting bases of IVs into the base expression might
1832          have caused folding opportunities.  */
1833       if (TREE_CODE (base) == ADDR_EXPR)
1834         {
1835           tree *ref = &TREE_OPERAND (base, 0);
1836           while (handled_component_p (*ref))
1837             ref = &TREE_OPERAND (*ref, 0);
1838           if (TREE_CODE (*ref) == MEM_REF)
1839             {
1840               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1841                                       TREE_OPERAND (*ref, 0),
1842                                       TREE_OPERAND (*ref, 1));
1843               if (tem)
1844                 *ref = tem;
1845             }
1846         }
1847     }
1848
1849   civ = alloc_iv (base, step);
1850   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1851   return;
1852
1853 fail:
1854   for_each_index (op_p, idx_record_use, data);
1855 }
1856
1857 /* Finds and records invariants used in STMT.  */
1858
1859 static void
1860 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1861 {
1862   ssa_op_iter iter;
1863   use_operand_p use_p;
1864   tree op;
1865
1866   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1867     {
1868       op = USE_FROM_PTR (use_p);
1869       record_invariant (data, op, false);
1870     }
1871 }
1872
1873 /* Finds interesting uses of induction variables in the statement STMT.  */
1874
1875 static void
1876 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1877 {
1878   struct iv *iv;
1879   tree op, *lhs, *rhs;
1880   ssa_op_iter iter;
1881   use_operand_p use_p;
1882   enum tree_code code;
1883
1884   find_invariants_stmt (data, stmt);
1885
1886   if (gimple_code (stmt) == GIMPLE_COND)
1887     {
1888       find_interesting_uses_cond (data, stmt);
1889       return;
1890     }
1891
1892   if (is_gimple_assign (stmt))
1893     {
1894       lhs = gimple_assign_lhs_ptr (stmt);
1895       rhs = gimple_assign_rhs1_ptr (stmt);
1896
1897       if (TREE_CODE (*lhs) == SSA_NAME)
1898         {
1899           /* If the statement defines an induction variable, the uses are not
1900              interesting by themselves.  */
1901
1902           iv = get_iv (data, *lhs);
1903
1904           if (iv && !integer_zerop (iv->step))
1905             return;
1906         }
1907
1908       code = gimple_assign_rhs_code (stmt);
1909       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1910           && (REFERENCE_CLASS_P (*rhs)
1911               || is_gimple_val (*rhs)))
1912         {
1913           if (REFERENCE_CLASS_P (*rhs))
1914             find_interesting_uses_address (data, stmt, rhs);
1915           else
1916             find_interesting_uses_op (data, *rhs);
1917
1918           if (REFERENCE_CLASS_P (*lhs))
1919             find_interesting_uses_address (data, stmt, lhs);
1920           return;
1921         }
1922       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1923         {
1924           find_interesting_uses_cond (data, stmt);
1925           return;
1926         }
1927
1928       /* TODO -- we should also handle address uses of type
1929
1930          memory = call (whatever);
1931
1932          and
1933
1934          call (memory).  */
1935     }
1936
1937   if (gimple_code (stmt) == GIMPLE_PHI
1938       && gimple_bb (stmt) == data->current_loop->header)
1939     {
1940       iv = get_iv (data, PHI_RESULT (stmt));
1941
1942       if (iv && !integer_zerop (iv->step))
1943         return;
1944     }
1945
1946   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1947     {
1948       op = USE_FROM_PTR (use_p);
1949
1950       if (TREE_CODE (op) != SSA_NAME)
1951         continue;
1952
1953       iv = get_iv (data, op);
1954       if (!iv)
1955         continue;
1956
1957       find_interesting_uses_op (data, op);
1958     }
1959 }
1960
1961 /* Finds interesting uses of induction variables outside of loops
1962    on loop exit edge EXIT.  */
1963
1964 static void
1965 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1966 {
1967   gimple phi;
1968   gimple_stmt_iterator psi;
1969   tree def;
1970
1971   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1972     {
1973       phi = gsi_stmt (psi);
1974       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1975       if (!virtual_operand_p (def))
1976         find_interesting_uses_op (data, def);
1977     }
1978 }
1979
1980 /* Finds uses of the induction variables that are interesting.  */
1981
1982 static void
1983 find_interesting_uses (struct ivopts_data *data)
1984 {
1985   basic_block bb;
1986   gimple_stmt_iterator bsi;
1987   basic_block *body = get_loop_body (data->current_loop);
1988   unsigned i;
1989   struct version_info *info;
1990   edge e;
1991
1992   if (dump_file && (dump_flags & TDF_DETAILS))
1993     fprintf (dump_file, "Uses:\n\n");
1994
1995   for (i = 0; i < data->current_loop->num_nodes; i++)
1996     {
1997       edge_iterator ei;
1998       bb = body[i];
1999
2000       FOR_EACH_EDGE (e, ei, bb->succs)
2001         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2002             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2003           find_interesting_uses_outside (data, e);
2004
2005       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2006         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2007       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2008         if (!is_gimple_debug (gsi_stmt (bsi)))
2009           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2010     }
2011
2012   if (dump_file && (dump_flags & TDF_DETAILS))
2013     {
2014       bitmap_iterator bi;
2015
2016       fprintf (dump_file, "\n");
2017
2018       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2019         {
2020           info = ver_info (data, i);
2021           if (info->inv_id)
2022             {
2023               fprintf (dump_file, "  ");
2024               print_generic_expr (dump_file, info->name, TDF_SLIM);
2025               fprintf (dump_file, " is invariant (%d)%s\n",
2026                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2027             }
2028         }
2029
2030       fprintf (dump_file, "\n");
2031     }
2032
2033   free (body);
2034 }
2035
2036 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2037    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2038    we are at the top-level of the processed address.  */
2039
2040 static tree
2041 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2042                 HOST_WIDE_INT *offset)
2043 {
2044   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2045   enum tree_code code;
2046   tree type, orig_type = TREE_TYPE (expr);
2047   HOST_WIDE_INT off0, off1, st;
2048   tree orig_expr = expr;
2049
2050   STRIP_NOPS (expr);
2051
2052   type = TREE_TYPE (expr);
2053   code = TREE_CODE (expr);
2054   *offset = 0;
2055
2056   switch (code)
2057     {
2058     case INTEGER_CST:
2059       if (!cst_and_fits_in_hwi (expr)
2060           || integer_zerop (expr))
2061         return orig_expr;
2062
2063       *offset = int_cst_value (expr);
2064       return build_int_cst (orig_type, 0);
2065
2066     case POINTER_PLUS_EXPR:
2067     case PLUS_EXPR:
2068     case MINUS_EXPR:
2069       op0 = TREE_OPERAND (expr, 0);
2070       op1 = TREE_OPERAND (expr, 1);
2071
2072       op0 = strip_offset_1 (op0, false, false, &off0);
2073       op1 = strip_offset_1 (op1, false, false, &off1);
2074
2075       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2076       if (op0 == TREE_OPERAND (expr, 0)
2077           && op1 == TREE_OPERAND (expr, 1))
2078         return orig_expr;
2079
2080       if (integer_zerop (op1))
2081         expr = op0;
2082       else if (integer_zerop (op0))
2083         {
2084           if (code == MINUS_EXPR)
2085             expr = fold_build1 (NEGATE_EXPR, type, op1);
2086           else
2087             expr = op1;
2088         }
2089       else
2090         expr = fold_build2 (code, type, op0, op1);
2091
2092       return fold_convert (orig_type, expr);
2093
2094     case MULT_EXPR:
2095       op1 = TREE_OPERAND (expr, 1);
2096       if (!cst_and_fits_in_hwi (op1))
2097         return orig_expr;
2098
2099       op0 = TREE_OPERAND (expr, 0);
2100       op0 = strip_offset_1 (op0, false, false, &off0);
2101       if (op0 == TREE_OPERAND (expr, 0))
2102         return orig_expr;
2103
2104       *offset = off0 * int_cst_value (op1);
2105       if (integer_zerop (op0))
2106         expr = op0;
2107       else
2108         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2109
2110       return fold_convert (orig_type, expr);
2111
2112     case ARRAY_REF:
2113     case ARRAY_RANGE_REF:
2114       if (!inside_addr)
2115         return orig_expr;
2116
2117       step = array_ref_element_size (expr);
2118       if (!cst_and_fits_in_hwi (step))
2119         break;
2120
2121       st = int_cst_value (step);
2122       op1 = TREE_OPERAND (expr, 1);
2123       op1 = strip_offset_1 (op1, false, false, &off1);
2124       *offset = off1 * st;
2125
2126       if (top_compref
2127           && integer_zerop (op1))
2128         {
2129           /* Strip the component reference completely.  */
2130           op0 = TREE_OPERAND (expr, 0);
2131           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2132           *offset += off0;
2133           return op0;
2134         }
2135       break;
2136
2137     case COMPONENT_REF:
2138       {
2139         tree field;
2140
2141         if (!inside_addr)
2142           return orig_expr;
2143
2144         tmp = component_ref_field_offset (expr);
2145         field = TREE_OPERAND (expr, 1);
2146         if (top_compref
2147             && cst_and_fits_in_hwi (tmp)
2148             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2149           {
2150             HOST_WIDE_INT boffset, abs_off;
2151
2152             /* Strip the component reference completely.  */
2153             op0 = TREE_OPERAND (expr, 0);
2154             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2155             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2156             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2157             if (boffset < 0)
2158               abs_off = -abs_off;
2159
2160             *offset = off0 + int_cst_value (tmp) + abs_off;
2161             return op0;
2162           }
2163       }
2164       break;
2165
2166     case ADDR_EXPR:
2167       op0 = TREE_OPERAND (expr, 0);
2168       op0 = strip_offset_1 (op0, true, true, &off0);
2169       *offset += off0;
2170
2171       if (op0 == TREE_OPERAND (expr, 0))
2172         return orig_expr;
2173
2174       expr = build_fold_addr_expr (op0);
2175       return fold_convert (orig_type, expr);
2176
2177     case MEM_REF:
2178       /* ???  Offset operand?  */
2179       inside_addr = false;
2180       break;
2181
2182     default:
2183       return orig_expr;
2184     }
2185
2186   /* Default handling of expressions for that we want to recurse into
2187      the first operand.  */
2188   op0 = TREE_OPERAND (expr, 0);
2189   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2190   *offset += off0;
2191
2192   if (op0 == TREE_OPERAND (expr, 0)
2193       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2194     return orig_expr;
2195
2196   expr = copy_node (expr);
2197   TREE_OPERAND (expr, 0) = op0;
2198   if (op1)
2199     TREE_OPERAND (expr, 1) = op1;
2200
2201   /* Inside address, we might strip the top level component references,
2202      thus changing type of the expression.  Handling of ADDR_EXPR
2203      will fix that.  */
2204   expr = fold_convert (orig_type, expr);
2205
2206   return expr;
2207 }
2208
2209 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2210
2211 static tree
2212 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2213 {
2214   HOST_WIDE_INT off;
2215   tree core = strip_offset_1 (expr, false, false, &off);
2216   *offset = off;
2217   return core;
2218 }
2219
2220 /* Returns variant of TYPE that can be used as base for different uses.
2221    We return unsigned type with the same precision, which avoids problems
2222    with overflows.  */
2223
2224 static tree
2225 generic_type_for (tree type)
2226 {
2227   if (POINTER_TYPE_P (type))
2228     return unsigned_type_for (type);
2229
2230   if (TYPE_UNSIGNED (type))
2231     return type;
2232
2233   return unsigned_type_for (type);
2234 }
2235
2236 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2237    the bitmap to that we should store it.  */
2238
2239 static struct ivopts_data *fd_ivopts_data;
2240 static tree
2241 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2242 {
2243   bitmap *depends_on = (bitmap *) data;
2244   struct version_info *info;
2245
2246   if (TREE_CODE (*expr_p) != SSA_NAME)
2247     return NULL_TREE;
2248   info = name_info (fd_ivopts_data, *expr_p);
2249
2250   if (!info->inv_id || info->has_nonlin_use)
2251     return NULL_TREE;
2252
2253   if (!*depends_on)
2254     *depends_on = BITMAP_ALLOC (NULL);
2255   bitmap_set_bit (*depends_on, info->inv_id);
2256
2257   return NULL_TREE;
2258 }
2259
2260 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2261    position to POS.  If USE is not NULL, the candidate is set as related to
2262    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2263    replacement of the final value of the iv by a direct computation.  */
2264
2265 static struct iv_cand *
2266 add_candidate_1 (struct ivopts_data *data,
2267                  tree base, tree step, bool important, enum iv_position pos,
2268                  struct iv_use *use, gimple incremented_at)
2269 {
2270   unsigned i;
2271   struct iv_cand *cand = NULL;
2272   tree type, orig_type;
2273
2274   /* For non-original variables, make sure their values are computed in a type
2275      that does not invoke undefined behavior on overflows (since in general,
2276      we cannot prove that these induction variables are non-wrapping).  */
2277   if (pos != IP_ORIGINAL)
2278     {
2279       orig_type = TREE_TYPE (base);
2280       type = generic_type_for (orig_type);
2281       if (type != orig_type)
2282         {
2283           base = fold_convert (type, base);
2284           step = fold_convert (type, step);
2285         }
2286     }
2287
2288   for (i = 0; i < n_iv_cands (data); i++)
2289     {
2290       cand = iv_cand (data, i);
2291
2292       if (cand->pos != pos)
2293         continue;
2294
2295       if (cand->incremented_at != incremented_at
2296           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2297               && cand->ainc_use != use))
2298         continue;
2299
2300       if (!cand->iv)
2301         {
2302           if (!base && !step)
2303             break;
2304
2305           continue;
2306         }
2307
2308       if (!base && !step)
2309         continue;
2310
2311       if (operand_equal_p (base, cand->iv->base, 0)
2312           && operand_equal_p (step, cand->iv->step, 0)
2313           && (TYPE_PRECISION (TREE_TYPE (base))
2314               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2315         break;
2316     }
2317
2318   if (i == n_iv_cands (data))
2319     {
2320       cand = XCNEW (struct iv_cand);
2321       cand->id = i;
2322
2323       if (!base && !step)
2324         cand->iv = NULL;
2325       else
2326         cand->iv = alloc_iv (base, step);
2327
2328       cand->pos = pos;
2329       if (pos != IP_ORIGINAL && cand->iv)
2330         {
2331           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2332           cand->var_after = cand->var_before;
2333         }
2334       cand->important = important;
2335       cand->incremented_at = incremented_at;
2336       data->iv_candidates.safe_push (cand);
2337
2338       if (step
2339           && TREE_CODE (step) != INTEGER_CST)
2340         {
2341           fd_ivopts_data = data;
2342           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2343         }
2344
2345       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2346         cand->ainc_use = use;
2347       else
2348         cand->ainc_use = NULL;
2349
2350       if (dump_file && (dump_flags & TDF_DETAILS))
2351         dump_cand (dump_file, cand);
2352     }
2353
2354   if (important && !cand->important)
2355     {
2356       cand->important = true;
2357       if (dump_file && (dump_flags & TDF_DETAILS))
2358         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2359     }
2360
2361   if (use)
2362     {
2363       bitmap_set_bit (use->related_cands, i);
2364       if (dump_file && (dump_flags & TDF_DETAILS))
2365         fprintf (dump_file, "Candidate %d is related to use %d\n",
2366                  cand->id, use->id);
2367     }
2368
2369   return cand;
2370 }
2371
2372 /* Returns true if incrementing the induction variable at the end of the LOOP
2373    is allowed.
2374
2375    The purpose is to avoid splitting latch edge with a biv increment, thus
2376    creating a jump, possibly confusing other optimization passes and leaving
2377    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2378    is not available (so we do not have a better alternative), or if the latch
2379    edge is already nonempty.  */
2380
2381 static bool
2382 allow_ip_end_pos_p (struct loop *loop)
2383 {
2384   if (!ip_normal_pos (loop))
2385     return true;
2386
2387   if (!empty_block_p (ip_end_pos (loop)))
2388     return true;
2389
2390   return false;
2391 }
2392
2393 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2394    Important field is set to IMPORTANT.  */
2395
2396 static void
2397 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2398                         bool important, struct iv_use *use)
2399 {
2400   basic_block use_bb = gimple_bb (use->stmt);
2401   enum machine_mode mem_mode;
2402   unsigned HOST_WIDE_INT cstepi;
2403
2404   /* If we insert the increment in any position other than the standard
2405      ones, we must ensure that it is incremented once per iteration.
2406      It must not be in an inner nested loop, or one side of an if
2407      statement.  */
2408   if (use_bb->loop_father != data->current_loop
2409       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2410       || stmt_could_throw_p (use->stmt)
2411       || !cst_and_fits_in_hwi (step))
2412     return;
2413
2414   cstepi = int_cst_value (step);
2415
2416   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2417   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2418         || USE_STORE_PRE_INCREMENT (mem_mode))
2419        && GET_MODE_SIZE (mem_mode) == cstepi)
2420       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2421            || USE_STORE_PRE_DECREMENT (mem_mode))
2422           && GET_MODE_SIZE (mem_mode) == -cstepi))
2423     {
2424       enum tree_code code = MINUS_EXPR;
2425       tree new_base;
2426       tree new_step = step;
2427
2428       if (POINTER_TYPE_P (TREE_TYPE (base)))
2429         {
2430           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2431           code = POINTER_PLUS_EXPR;
2432         }
2433       else
2434         new_step = fold_convert (TREE_TYPE (base), new_step);
2435       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2436       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2437                        use->stmt);
2438     }
2439   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2440         || USE_STORE_POST_INCREMENT (mem_mode))
2441        && GET_MODE_SIZE (mem_mode) == cstepi)
2442       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2443            || USE_STORE_POST_DECREMENT (mem_mode))
2444           && GET_MODE_SIZE (mem_mode) == -cstepi))
2445     {
2446       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2447                        use->stmt);
2448     }
2449 }
2450
2451 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2452    position to POS.  If USE is not NULL, the candidate is set as related to
2453    it.  The candidate computation is scheduled on all available positions.  */
2454
2455 static void
2456 add_candidate (struct ivopts_data *data,
2457                tree base, tree step, bool important, struct iv_use *use)
2458 {
2459   if (ip_normal_pos (data->current_loop))
2460     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2461   if (ip_end_pos (data->current_loop)
2462       && allow_ip_end_pos_p (data->current_loop))
2463     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2464
2465   if (use != NULL && use->type == USE_ADDRESS)
2466     add_autoinc_candidates (data, base, step, important, use);
2467 }
2468
2469 /* Adds standard iv candidates.  */
2470
2471 static void
2472 add_standard_iv_candidates (struct ivopts_data *data)
2473 {
2474   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2475
2476   /* The same for a double-integer type if it is still fast enough.  */
2477   if (TYPE_PRECISION
2478         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2479       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2480     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2481                    build_int_cst (long_integer_type_node, 1), true, NULL);
2482
2483   /* The same for a double-integer type if it is still fast enough.  */
2484   if (TYPE_PRECISION
2485         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2486       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2487     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2488                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2489 }
2490
2491
2492 /* Adds candidates bases on the old induction variable IV.  */
2493
2494 static void
2495 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2496 {
2497   gimple phi;
2498   tree def;
2499   struct iv_cand *cand;
2500
2501   add_candidate (data, iv->base, iv->step, true, NULL);
2502
2503   /* The same, but with initial value zero.  */
2504   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2505     add_candidate (data, size_int (0), iv->step, true, NULL);
2506   else
2507     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2508                    iv->step, true, NULL);
2509
2510   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2511   if (gimple_code (phi) == GIMPLE_PHI)
2512     {
2513       /* Additionally record the possibility of leaving the original iv
2514          untouched.  */
2515       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2516       /* Don't add candidate if it's from another PHI node because
2517          it's an affine iv appearing in the form of PEELED_CHREC.  */
2518       phi = SSA_NAME_DEF_STMT (def);
2519       if (gimple_code (phi) != GIMPLE_PHI)
2520         {
2521           cand = add_candidate_1 (data,
2522                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2523                                   SSA_NAME_DEF_STMT (def));
2524           cand->var_before = iv->ssa_name;
2525           cand->var_after = def;
2526         }
2527       else
2528         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2529     }
2530 }
2531
2532 /* Adds candidates based on the old induction variables.  */
2533
2534 static void
2535 add_old_ivs_candidates (struct ivopts_data *data)
2536 {
2537   unsigned i;
2538   struct iv *iv;
2539   bitmap_iterator bi;
2540
2541   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2542     {
2543       iv = ver_info (data, i)->iv;
2544       if (iv && iv->biv_p && !integer_zerop (iv->step))
2545         add_old_iv_candidates (data, iv);
2546     }
2547 }
2548
2549 /* Adds candidates based on the value of the induction variable IV and USE.  */
2550
2551 static void
2552 add_iv_value_candidates (struct ivopts_data *data,
2553                          struct iv *iv, struct iv_use *use)
2554 {
2555   unsigned HOST_WIDE_INT offset;
2556   tree base;
2557   tree basetype;
2558
2559   add_candidate (data, iv->base, iv->step, false, use);
2560
2561   /* The same, but with initial value zero.  Make such variable important,
2562      since it is generic enough so that possibly many uses may be based
2563      on it.  */
2564   basetype = TREE_TYPE (iv->base);
2565   if (POINTER_TYPE_P (basetype))
2566     basetype = sizetype;
2567   add_candidate (data, build_int_cst (basetype, 0),
2568                  iv->step, true, use);
2569
2570   /* Third, try removing the constant offset.  Make sure to even
2571      add a candidate for &a[0] vs. (T *)&a.  */
2572   base = strip_offset (iv->base, &offset);
2573   if (offset
2574       || base != iv->base)
2575     add_candidate (data, base, iv->step, false, use);
2576 }
2577
2578 /* Adds candidates based on the uses.  */
2579
2580 static void
2581 add_derived_ivs_candidates (struct ivopts_data *data)
2582 {
2583   unsigned i;
2584
2585   for (i = 0; i < n_iv_uses (data); i++)
2586     {
2587       struct iv_use *use = iv_use (data, i);
2588
2589       if (!use)
2590         continue;
2591
2592       switch (use->type)
2593         {
2594         case USE_NONLINEAR_EXPR:
2595         case USE_COMPARE:
2596         case USE_ADDRESS:
2597           /* Just add the ivs based on the value of the iv used here.  */
2598           add_iv_value_candidates (data, use->iv, use);
2599           break;
2600
2601         default:
2602           gcc_unreachable ();
2603         }
2604     }
2605 }
2606
2607 /* Record important candidates and add them to related_cands bitmaps
2608    if needed.  */
2609
2610 static void
2611 record_important_candidates (struct ivopts_data *data)
2612 {
2613   unsigned i;
2614   struct iv_use *use;
2615
2616   for (i = 0; i < n_iv_cands (data); i++)
2617     {
2618       struct iv_cand *cand = iv_cand (data, i);
2619
2620       if (cand->important)
2621         bitmap_set_bit (data->important_candidates, i);
2622     }
2623
2624   data->consider_all_candidates = (n_iv_cands (data)
2625                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2626
2627   if (data->consider_all_candidates)
2628     {
2629       /* We will not need "related_cands" bitmaps in this case,
2630          so release them to decrease peak memory consumption.  */
2631       for (i = 0; i < n_iv_uses (data); i++)
2632         {
2633           use = iv_use (data, i);
2634           BITMAP_FREE (use->related_cands);
2635         }
2636     }
2637   else
2638     {
2639       /* Add important candidates to the related_cands bitmaps.  */
2640       for (i = 0; i < n_iv_uses (data); i++)
2641         bitmap_ior_into (iv_use (data, i)->related_cands,
2642                          data->important_candidates);
2643     }
2644 }
2645
2646 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2647    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2648    we allocate a simple list to every use.  */
2649
2650 static void
2651 alloc_use_cost_map (struct ivopts_data *data)
2652 {
2653   unsigned i, size, s;
2654
2655   for (i = 0; i < n_iv_uses (data); i++)
2656     {
2657       struct iv_use *use = iv_use (data, i);
2658
2659       if (data->consider_all_candidates)
2660         size = n_iv_cands (data);
2661       else
2662         {
2663           s = bitmap_count_bits (use->related_cands);
2664
2665           /* Round up to the power of two, so that moduling by it is fast.  */
2666           size = s ? (1 << ceil_log2 (s)) : 1;
2667         }
2668
2669       use->n_map_members = size;
2670       use->cost_map = XCNEWVEC (struct cost_pair, size);
2671     }
2672 }
2673
2674 /* Returns description of computation cost of expression whose runtime
2675    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2676
2677 static comp_cost
2678 new_cost (unsigned runtime, unsigned complexity)
2679 {
2680   comp_cost cost;
2681
2682   cost.cost = runtime;
2683   cost.complexity = complexity;
2684
2685   return cost;
2686 }
2687
2688 /* Adds costs COST1 and COST2.  */
2689
2690 static comp_cost
2691 add_costs (comp_cost cost1, comp_cost cost2)
2692 {
2693   cost1.cost += cost2.cost;
2694   cost1.complexity += cost2.complexity;
2695
2696   return cost1;
2697 }
2698 /* Subtracts costs COST1 and COST2.  */
2699
2700 static comp_cost
2701 sub_costs (comp_cost cost1, comp_cost cost2)
2702 {
2703   cost1.cost -= cost2.cost;
2704   cost1.complexity -= cost2.complexity;
2705
2706   return cost1;
2707 }
2708
2709 /* Returns a negative number if COST1 < COST2, a positive number if
2710    COST1 > COST2, and 0 if COST1 = COST2.  */
2711
2712 static int
2713 compare_costs (comp_cost cost1, comp_cost cost2)
2714 {
2715   if (cost1.cost == cost2.cost)
2716     return cost1.complexity - cost2.complexity;
2717
2718   return cost1.cost - cost2.cost;
2719 }
2720
2721 /* Returns true if COST is infinite.  */
2722
2723 static bool
2724 infinite_cost_p (comp_cost cost)
2725 {
2726   return cost.cost == INFTY;
2727 }
2728
2729 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2730    on invariants DEPENDS_ON and that the value used in expressing it
2731    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2732
2733 static void
2734 set_use_iv_cost (struct ivopts_data *data,
2735                  struct iv_use *use, struct iv_cand *cand,
2736                  comp_cost cost, bitmap depends_on, tree value,
2737                  enum tree_code comp, int inv_expr_id)
2738 {
2739   unsigned i, s;
2740
2741   if (infinite_cost_p (cost))
2742     {
2743       BITMAP_FREE (depends_on);
2744       return;
2745     }
2746
2747   if (data->consider_all_candidates)
2748     {
2749       use->cost_map[cand->id].cand = cand;
2750       use->cost_map[cand->id].cost = cost;
2751       use->cost_map[cand->id].depends_on = depends_on;
2752       use->cost_map[cand->id].value = value;
2753       use->cost_map[cand->id].comp = comp;
2754       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2755       return;
2756     }
2757
2758   /* n_map_members is a power of two, so this computes modulo.  */
2759   s = cand->id & (use->n_map_members - 1);
2760   for (i = s; i < use->n_map_members; i++)
2761     if (!use->cost_map[i].cand)
2762       goto found;
2763   for (i = 0; i < s; i++)
2764     if (!use->cost_map[i].cand)
2765       goto found;
2766
2767   gcc_unreachable ();
2768
2769 found:
2770   use->cost_map[i].cand = cand;
2771   use->cost_map[i].cost = cost;
2772   use->cost_map[i].depends_on = depends_on;
2773   use->cost_map[i].value = value;
2774   use->cost_map[i].comp = comp;
2775   use->cost_map[i].inv_expr_id = inv_expr_id;
2776 }
2777
2778 /* Gets cost of (USE, CANDIDATE) pair.  */
2779
2780 static struct cost_pair *
2781 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2782                  struct iv_cand *cand)
2783 {
2784   unsigned i, s;
2785   struct cost_pair *ret;
2786
2787   if (!cand)
2788     return NULL;
2789
2790   if (data->consider_all_candidates)
2791     {
2792       ret = use->cost_map + cand->id;
2793       if (!ret->cand)
2794         return NULL;
2795
2796       return ret;
2797     }
2798
2799   /* n_map_members is a power of two, so this computes modulo.  */
2800   s = cand->id & (use->n_map_members - 1);
2801   for (i = s; i < use->n_map_members; i++)
2802     if (use->cost_map[i].cand == cand)
2803       return use->cost_map + i;
2804     else if (use->cost_map[i].cand == NULL)
2805       return NULL;
2806   for (i = 0; i < s; i++)
2807     if (use->cost_map[i].cand == cand)
2808       return use->cost_map + i;
2809     else if (use->cost_map[i].cand == NULL)
2810       return NULL;
2811
2812   return NULL;
2813 }
2814
2815 /* Returns estimate on cost of computing SEQ.  */
2816
2817 static unsigned
2818 seq_cost (rtx seq, bool speed)
2819 {
2820   unsigned cost = 0;
2821   rtx set;
2822
2823   for (; seq; seq = NEXT_INSN (seq))
2824     {
2825       set = single_set (seq);
2826       if (set)
2827         cost += set_src_cost (SET_SRC (set), speed);
2828       else
2829         cost++;
2830     }
2831
2832   return cost;
2833 }
2834
2835 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2836 static rtx
2837 produce_memory_decl_rtl (tree obj, int *regno)
2838 {
2839   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2840   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2841   rtx x;
2842
2843   gcc_assert (obj);
2844   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2845     {
2846       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2847       x = gen_rtx_SYMBOL_REF (address_mode, name);
2848       SET_SYMBOL_REF_DECL (x, obj);
2849       x = gen_rtx_MEM (DECL_MODE (obj), x);
2850       set_mem_addr_space (x, as);
2851       targetm.encode_section_info (obj, x, true);
2852     }
2853   else
2854     {
2855       x = gen_raw_REG (address_mode, (*regno)++);
2856       x = gen_rtx_MEM (DECL_MODE (obj), x);
2857       set_mem_addr_space (x, as);
2858     }
2859
2860   return x;
2861 }
2862
2863 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2864    walk_tree.  DATA contains the actual fake register number.  */
2865
2866 static tree
2867 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2868 {
2869   tree obj = NULL_TREE;
2870   rtx x = NULL_RTX;
2871   int *regno = (int *) data;
2872
2873   switch (TREE_CODE (*expr_p))
2874     {
2875     case ADDR_EXPR:
2876       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2877            handled_component_p (*expr_p);
2878            expr_p = &TREE_OPERAND (*expr_p, 0))
2879         continue;
2880       obj = *expr_p;
2881       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2882         x = produce_memory_decl_rtl (obj, regno);
2883       break;
2884
2885     case SSA_NAME:
2886       *ws = 0;
2887       obj = SSA_NAME_VAR (*expr_p);
2888       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2889       if (!obj)
2890         return NULL_TREE;
2891       if (!DECL_RTL_SET_P (obj))
2892         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2893       break;
2894
2895     case VAR_DECL:
2896     case PARM_DECL:
2897     case RESULT_DECL:
2898       *ws = 0;
2899       obj = *expr_p;
2900
2901       if (DECL_RTL_SET_P (obj))
2902         break;
2903
2904       if (DECL_MODE (obj) == BLKmode)
2905         x = produce_memory_decl_rtl (obj, regno);
2906       else
2907         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2908
2909       break;
2910
2911     default:
2912       break;
2913     }
2914
2915   if (x)
2916     {
2917       decl_rtl_to_reset.safe_push (obj);
2918       SET_DECL_RTL (obj, x);
2919     }
2920
2921   return NULL_TREE;
2922 }
2923
2924 /* Determines cost of the computation of EXPR.  */
2925
2926 static unsigned
2927 computation_cost (tree expr, bool speed)
2928 {
2929   rtx seq, rslt;
2930   tree type = TREE_TYPE (expr);
2931   unsigned cost;
2932   /* Avoid using hard regs in ways which may be unsupported.  */
2933   int regno = LAST_VIRTUAL_REGISTER + 1;
2934   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2935   enum node_frequency real_frequency = node->frequency;
2936
2937   node->frequency = NODE_FREQUENCY_NORMAL;
2938   crtl->maybe_hot_insn_p = speed;
2939   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2940   start_sequence ();
2941   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2942   seq = get_insns ();
2943   end_sequence ();
2944   default_rtl_profile ();
2945   node->frequency = real_frequency;
2946
2947   cost = seq_cost (seq, speed);
2948   if (MEM_P (rslt))
2949     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2950                           TYPE_ADDR_SPACE (type), speed);
2951   else if (!REG_P (rslt))
2952     cost += set_src_cost (rslt, speed);
2953
2954   return cost;
2955 }
2956
2957 /* Returns variable containing the value of candidate CAND at statement AT.  */
2958
2959 static tree
2960 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2961 {
2962   if (stmt_after_increment (loop, cand, stmt))
2963     return cand->var_after;
2964   else
2965     return cand->var_before;
2966 }
2967
2968 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2969    same precision that is at least as wide as the precision of TYPE, stores
2970    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2971    type of A and B.  */
2972
2973 static tree
2974 determine_common_wider_type (tree *a, tree *b)
2975 {
2976   tree wider_type = NULL;
2977   tree suba, subb;
2978   tree atype = TREE_TYPE (*a);
2979
2980   if (CONVERT_EXPR_P (*a))
2981     {
2982       suba = TREE_OPERAND (*a, 0);
2983       wider_type = TREE_TYPE (suba);
2984       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2985         return atype;
2986     }
2987   else
2988     return atype;
2989
2990   if (CONVERT_EXPR_P (*b))
2991     {
2992       subb = TREE_OPERAND (*b, 0);
2993       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2994         return atype;
2995     }
2996   else
2997     return atype;
2998
2999   *a = suba;
3000   *b = subb;
3001   return wider_type;
3002 }
3003
3004 /* Determines the expression by that USE is expressed from induction variable
3005    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3006    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3007
3008 static bool
3009 get_computation_aff (struct loop *loop,
3010                      struct iv_use *use, struct iv_cand *cand, gimple at,
3011                      struct aff_tree *aff)
3012 {
3013   tree ubase = use->iv->base;
3014   tree ustep = use->iv->step;
3015   tree cbase = cand->iv->base;
3016   tree cstep = cand->iv->step, cstep_common;
3017   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3018   tree common_type, var;
3019   tree uutype;
3020   aff_tree cbase_aff, var_aff;
3021   double_int rat;
3022
3023   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3024     {
3025       /* We do not have a precision to express the values of use.  */
3026       return false;
3027     }
3028
3029   var = var_at_stmt (loop, cand, at);
3030   uutype = unsigned_type_for (utype);
3031
3032   /* If the conversion is not noop, perform it.  */
3033   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3034     {
3035       cstep = fold_convert (uutype, cstep);
3036       cbase = fold_convert (uutype, cbase);
3037       var = fold_convert (uutype, var);
3038     }
3039
3040   if (!constant_multiple_of (ustep, cstep, &rat))
3041     return false;
3042
3043   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3044      type, we achieve better folding by computing their difference in this
3045      wider type, and cast the result to UUTYPE.  We do not need to worry about
3046      overflows, as all the arithmetics will in the end be performed in UUTYPE
3047      anyway.  */
3048   common_type = determine_common_wider_type (&ubase, &cbase);
3049
3050   /* use = ubase - ratio * cbase + ratio * var.  */
3051   tree_to_aff_combination (ubase, common_type, aff);
3052   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3053   tree_to_aff_combination (var, uutype, &var_aff);
3054
3055   /* We need to shift the value if we are after the increment.  */
3056   if (stmt_after_increment (loop, cand, at))
3057     {
3058       aff_tree cstep_aff;
3059
3060       if (common_type != uutype)
3061         cstep_common = fold_convert (common_type, cstep);
3062       else
3063         cstep_common = cstep;
3064
3065       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3066       aff_combination_add (&cbase_aff, &cstep_aff);
3067     }
3068
3069   aff_combination_scale (&cbase_aff, -rat);
3070   aff_combination_add (aff, &cbase_aff);
3071   if (common_type != uutype)
3072     aff_combination_convert (aff, uutype);
3073
3074   aff_combination_scale (&var_aff, rat);
3075   aff_combination_add (aff, &var_aff);
3076
3077   return true;
3078 }
3079
3080 /* Return the type of USE.  */
3081
3082 static tree
3083 get_use_type (struct iv_use *use)
3084 {
3085   tree base_type = TREE_TYPE (use->iv->base);
3086   tree type;
3087
3088   if (use->type == USE_ADDRESS)
3089     {
3090       /* The base_type may be a void pointer.  Create a pointer type based on
3091          the mem_ref instead.  */
3092       type = build_pointer_type (TREE_TYPE (*use->op_p));
3093       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3094                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3095     }
3096   else
3097     type = base_type;
3098
3099   return type;
3100 }
3101
3102 /* Determines the expression by that USE is expressed from induction variable
3103    CAND at statement AT in LOOP.  The computation is unshared.  */
3104
3105 static tree
3106 get_computation_at (struct loop *loop,
3107                     struct iv_use *use, struct iv_cand *cand, gimple at)
3108 {
3109   aff_tree aff;
3110   tree type = get_use_type (use);
3111
3112   if (!get_computation_aff (loop, use, cand, at, &aff))
3113     return NULL_TREE;
3114   unshare_aff_combination (&aff);
3115   return fold_convert (type, aff_combination_to_tree (&aff));
3116 }
3117
3118 /* Determines the expression by that USE is expressed from induction variable
3119    CAND in LOOP.  The computation is unshared.  */
3120
3121 static tree
3122 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3123 {
3124   return get_computation_at (loop, use, cand, use->stmt);
3125 }
3126
3127 /* Adjust the cost COST for being in loop setup rather than loop body.
3128    If we're optimizing for space, the loop setup overhead is constant;
3129    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3130 static unsigned
3131 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3132 {
3133   if (cost == INFTY)
3134     return cost;
3135   else if (optimize_loop_for_speed_p (data->current_loop))
3136     return cost / avg_loop_niter (data->current_loop);
3137   else
3138     return cost;
3139 }
3140
3141 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3142    validity for a memory reference accessing memory of mode MODE in
3143    address space AS.  */
3144
3145
3146 bool
3147 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3148                                  addr_space_t as)
3149 {
3150 #define MAX_RATIO 128
3151   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3152   static vec<sbitmap> valid_mult_list;
3153   sbitmap valid_mult;
3154
3155   if (data_index >= valid_mult_list.length ())
3156     valid_mult_list.safe_grow_cleared (data_index + 1);
3157
3158   valid_mult = valid_mult_list[data_index];
3159   if (!valid_mult)
3160     {
3161       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3162       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3163       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3164       rtx addr, scaled;
3165       HOST_WIDE_INT i;
3166
3167       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3168       bitmap_clear (valid_mult);
3169       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3170       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3171       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3172         {
3173           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3174           if (memory_address_addr_space_p (mode, addr, as)
3175               || memory_address_addr_space_p (mode, scaled, as))
3176             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3177         }
3178
3179       if (dump_file && (dump_flags & TDF_DETAILS))
3180         {
3181           fprintf (dump_file, "  allowed multipliers:");
3182           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3183             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3184               fprintf (dump_file, " %d", (int) i);
3185           fprintf (dump_file, "\n");
3186           fprintf (dump_file, "\n");
3187         }
3188
3189       valid_mult_list[data_index] = valid_mult;
3190     }
3191
3192   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3193     return false;
3194
3195   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3196 }
3197
3198 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3199    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3200    variable is omitted.  Compute the cost for a memory reference that accesses
3201    a memory location of mode MEM_MODE in address space AS.
3202
3203    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3204    size of MEM_MODE / RATIO) is available.  To make this determination, we
3205    look at the size of the increment to be made, which is given in CSTEP.
3206    CSTEP may be zero if the step is unknown.
3207    STMT_AFTER_INC is true iff the statement we're looking at is after the
3208    increment of the original biv.
3209
3210    TODO -- there must be some better way.  This all is quite crude.  */
3211
3212 enum ainc_type
3213 {
3214   AINC_PRE_INC,         /* Pre increment.  */
3215   AINC_PRE_DEC,         /* Pre decrement.  */
3216   AINC_POST_INC,        /* Post increment.  */
3217   AINC_POST_DEC,        /* Post decrement.  */
3218   AINC_NONE             /* Also the number of auto increment types.  */
3219 };
3220
3221 typedef struct address_cost_data_s
3222 {
3223   HOST_WIDE_INT min_offset, max_offset;
3224   unsigned costs[2][2][2][2];
3225   unsigned ainc_costs[AINC_NONE];
3226 } *address_cost_data;
3227
3228
3229 static comp_cost
3230 get_address_cost (bool symbol_present, bool var_present,
3231                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3232                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3233                   addr_space_t as, bool speed,
3234                   bool stmt_after_inc, bool *may_autoinc)
3235 {
3236   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3237   static vec<address_cost_data> address_cost_data_list;
3238   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3239   address_cost_data data;
3240   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3241   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3242   unsigned cost, acost, complexity;
3243   enum ainc_type autoinc_type;
3244   bool offset_p, ratio_p, autoinc;
3245   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3246   unsigned HOST_WIDE_INT mask;
3247   unsigned bits;
3248
3249   if (data_index >= address_cost_data_list.length ())
3250     address_cost_data_list.safe_grow_cleared (data_index + 1);
3251
3252   data = address_cost_data_list[data_index];
3253   if (!data)
3254     {
3255       HOST_WIDE_INT i;
3256       HOST_WIDE_INT rat, off = 0;
3257       int old_cse_not_expected, width;
3258       unsigned sym_p, var_p, off_p, rat_p, add_c;
3259       rtx seq, addr, base;
3260       rtx reg0, reg1;
3261
3262       data = (address_cost_data) xcalloc (1, sizeof (*data));
3263
3264       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3265
3266       width = GET_MODE_BITSIZE (address_mode) - 1;
3267       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3268         width = HOST_BITS_PER_WIDE_INT - 1;
3269       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3270
3271       for (i = width; i >= 0; i--)
3272         {
3273           off = -((unsigned HOST_WIDE_INT) 1 << i);
3274           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3275           if (memory_address_addr_space_p (mem_mode, addr, as))
3276             break;
3277         }
3278       data->min_offset = (i == -1? 0 : off);
3279
3280       for (i = width; i >= 0; i--)
3281         {
3282           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3283           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3284           if (memory_address_addr_space_p (mem_mode, addr, as))
3285             break;
3286         }
3287       if (i == -1)
3288         off = 0;
3289       data->max_offset = off;
3290
3291       if (dump_file && (dump_flags & TDF_DETAILS))
3292         {
3293           fprintf (dump_file, "get_address_cost:\n");
3294           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3295                    GET_MODE_NAME (mem_mode),
3296                    data->min_offset);
3297           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3298                    GET_MODE_NAME (mem_mode),
3299                    data->max_offset);
3300         }
3301
3302       rat = 1;
3303       for (i = 2; i <= MAX_RATIO; i++)
3304         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3305           {
3306             rat = i;
3307             break;
3308           }
3309
3310       /* Compute the cost of various addressing modes.  */
3311       acost = 0;
3312       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3313       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3314
3315       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3316           || USE_STORE_PRE_DECREMENT (mem_mode))
3317         {
3318           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3319           has_predec[mem_mode]
3320             = memory_address_addr_space_p (mem_mode, addr, as);
3321
3322           if (has_predec[mem_mode])
3323             data->ainc_costs[AINC_PRE_DEC]
3324               = address_cost (addr, mem_mode, as, speed);
3325         }
3326       if (USE_LOAD_POST_DECREMENT (mem_mode)
3327           || USE_STORE_POST_DECREMENT (mem_mode))
3328         {
3329           addr = gen_rtx_POST_DEC (address_mode, reg0);
3330           has_postdec[mem_mode]
3331             = memory_address_addr_space_p (mem_mode, addr, as);
3332
3333           if (has_postdec[mem_mode])
3334             data->ainc_costs[AINC_POST_DEC]
3335               = address_cost (addr, mem_mode, as, speed);
3336         }
3337       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3338           || USE_STORE_PRE_DECREMENT (mem_mode))
3339         {
3340           addr = gen_rtx_PRE_INC (address_mode, reg0);
3341           has_preinc[mem_mode]
3342             = memory_address_addr_space_p (mem_mode, addr, as);
3343
3344           if (has_preinc[mem_mode])
3345             data->ainc_costs[AINC_PRE_INC]
3346               = address_cost (addr, mem_mode, as, speed);
3347         }
3348       if (USE_LOAD_POST_INCREMENT (mem_mode)
3349           || USE_STORE_POST_INCREMENT (mem_mode))
3350         {
3351           addr = gen_rtx_POST_INC (address_mode, reg0);
3352           has_postinc[mem_mode]
3353             = memory_address_addr_space_p (mem_mode, addr, as);
3354
3355           if (has_postinc[mem_mode])
3356             data->ainc_costs[AINC_POST_INC]
3357               = address_cost (addr, mem_mode, as, speed);
3358         }
3359       for (i = 0; i < 16; i++)
3360         {
3361           sym_p = i & 1;
3362           var_p = (i >> 1) & 1;
3363           off_p = (i >> 2) & 1;
3364           rat_p = (i >> 3) & 1;
3365
3366           addr = reg0;
3367           if (rat_p)
3368             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3369                                    gen_int_mode (rat, address_mode));
3370
3371           if (var_p)
3372             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3373
3374           if (sym_p)
3375             {
3376               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3377               /* ??? We can run into trouble with some backends by presenting
3378                  it with symbols which haven't been properly passed through
3379                  targetm.encode_section_info.  By setting the local bit, we
3380                  enhance the probability of things working.  */
3381               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3382
3383               if (off_p)
3384                 base = gen_rtx_fmt_e (CONST, address_mode,
3385                                       gen_rtx_fmt_ee
3386                                         (PLUS, address_mode, base,
3387                                          gen_int_mode (off, address_mode)));
3388             }
3389           else if (off_p)
3390             base = gen_int_mode (off, address_mode);
3391           else
3392             base = NULL_RTX;
3393
3394           if (base)
3395             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3396
3397           start_sequence ();
3398           /* To avoid splitting addressing modes, pretend that no cse will
3399              follow.  */
3400           old_cse_not_expected = cse_not_expected;
3401           cse_not_expected = true;
3402           addr = memory_address_addr_space (mem_mode, addr, as);
3403           cse_not_expected = old_cse_not_expected;
3404           seq = get_insns ();
3405           end_sequence ();
3406
3407           acost = seq_cost (seq, speed);
3408           acost += address_cost (addr, mem_mode, as, speed);
3409
3410           if (!acost)
3411             acost = 1;
3412           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3413         }
3414
3415       /* On some targets, it is quite expensive to load symbol to a register,
3416          which makes addresses that contain symbols look much more expensive.
3417          However, the symbol will have to be loaded in any case before the
3418          loop (and quite likely we have it in register already), so it does not
3419          make much sense to penalize them too heavily.  So make some final
3420          tweaks for the SYMBOL_PRESENT modes:
3421
3422          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3423          var is cheaper, use this mode with small penalty.
3424          If VAR_PRESENT is true, try whether the mode with
3425          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3426          if this is the case, use it.  */
3427       add_c = add_cost (speed, address_mode);
3428       for (i = 0; i < 8; i++)
3429         {
3430           var_p = i & 1;
3431           off_p = (i >> 1) & 1;
3432           rat_p = (i >> 2) & 1;
3433
3434           acost = data->costs[0][1][off_p][rat_p] + 1;
3435           if (var_p)
3436             acost += add_c;
3437
3438           if (acost < data->costs[1][var_p][off_p][rat_p])
3439             data->costs[1][var_p][off_p][rat_p] = acost;
3440         }
3441
3442       if (dump_file && (dump_flags & TDF_DETAILS))
3443         {
3444           fprintf (dump_file, "Address costs:\n");
3445
3446           for (i = 0; i < 16; i++)
3447             {
3448               sym_p = i & 1;
3449               var_p = (i >> 1) & 1;
3450               off_p = (i >> 2) & 1;
3451               rat_p = (i >> 3) & 1;
3452
3453               fprintf (dump_file, "  ");
3454               if (sym_p)
3455                 fprintf (dump_file, "sym + ");
3456               if (var_p)
3457                 fprintf (dump_file, "var + ");
3458               if (off_p)
3459                 fprintf (dump_file, "cst + ");
3460               if (rat_p)
3461                 fprintf (dump_file, "rat * ");
3462
3463               acost = data->costs[sym_p][var_p][off_p][rat_p];
3464               fprintf (dump_file, "index costs %d\n", acost);
3465             }
3466           if (has_predec[mem_mode] || has_postdec[mem_mode]
3467               || has_preinc[mem_mode] || has_postinc[mem_mode])
3468             fprintf (dump_file, "  May include autoinc/dec\n");
3469           fprintf (dump_file, "\n");
3470         }
3471
3472       address_cost_data_list[data_index] = data;
3473     }
3474
3475   bits = GET_MODE_BITSIZE (address_mode);
3476   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3477   offset &= mask;
3478   if ((offset >> (bits - 1) & 1))
3479     offset |= ~mask;
3480   s_offset = offset;
3481
3482   autoinc = false;
3483   autoinc_type = AINC_NONE;
3484   msize = GET_MODE_SIZE (mem_mode);
3485   autoinc_offset = offset;
3486   if (stmt_after_inc)
3487     autoinc_offset += ratio * cstep;
3488   if (symbol_present || var_present || ratio != 1)
3489     autoinc = false;
3490   else
3491     {
3492       if (has_postinc[mem_mode] && autoinc_offset == 0
3493           && msize == cstep)
3494         autoinc_type = AINC_POST_INC;
3495       else if (has_postdec[mem_mode] && autoinc_offset == 0
3496                && msize == -cstep)
3497         autoinc_type = AINC_POST_DEC;
3498       else if (has_preinc[mem_mode] && autoinc_offset == msize
3499                && msize == cstep)
3500         autoinc_type = AINC_PRE_INC;
3501       else if (has_predec[mem_mode] && autoinc_offset == -msize
3502                && msize == -cstep)
3503         autoinc_type = AINC_PRE_DEC;
3504
3505       if (autoinc_type != AINC_NONE)
3506         autoinc = true;
3507     }
3508
3509   cost = 0;
3510   offset_p = (s_offset != 0
3511               && data->min_offset <= s_offset
3512               && s_offset <= data->max_offset);
3513   ratio_p = (ratio != 1
3514              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3515
3516   if (ratio != 1 && !ratio_p)
3517     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3518
3519   if (s_offset && !offset_p && !symbol_present)
3520     cost += add_cost (speed, address_mode);
3521
3522   if (may_autoinc)
3523     *may_autoinc = autoinc;
3524   if (autoinc)
3525     acost = data->ainc_costs[autoinc_type];
3526   else
3527     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3528   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3529   return new_cost (cost + acost, complexity);
3530 }
3531
3532  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3533     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3534     calculating the operands of EXPR.  Returns true if successful, and returns
3535     the cost in COST.  */
3536
3537 static bool
3538 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3539                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3540 {
3541   comp_cost res;
3542   tree op1 = TREE_OPERAND (expr, 1);
3543   tree cst = TREE_OPERAND (mult, 1);
3544   tree multop = TREE_OPERAND (mult, 0);
3545   int m = exact_log2 (int_cst_value (cst));
3546   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3547   int sa_cost;
3548   bool equal_p = false;
3549
3550   if (!(m >= 0 && m < maxm))
3551     return false;
3552
3553   if (operand_equal_p (op1, mult, 0))
3554     equal_p = true;
3555
3556   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3557              ? shiftadd_cost (speed, mode, m)
3558              : (equal_p
3559                 ? shiftsub1_cost (speed, mode, m)
3560                 : shiftsub0_cost (speed, mode, m)));
3561   res = new_cost (sa_cost, 0);
3562   res = add_costs (res, equal_p ? cost0 : cost1);
3563
3564   STRIP_NOPS (multop);
3565   if (!is_gimple_val (multop))
3566     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3567
3568   *cost = res;
3569   return true;
3570 }
3571
3572 /* Estimates cost of forcing expression EXPR into a variable.  */
3573
3574 static comp_cost
3575 force_expr_to_var_cost (tree expr, bool speed)
3576 {
3577   static bool costs_initialized = false;
3578   static unsigned integer_cost [2];
3579   static unsigned symbol_cost [2];
3580   static unsigned address_cost [2];
3581   tree op0, op1;
3582   comp_cost cost0, cost1, cost;
3583   enum machine_mode mode;
3584
3585   if (!costs_initialized)
3586     {
3587       tree type = build_pointer_type (integer_type_node);
3588       tree var, addr;
3589       rtx x;
3590       int i;
3591
3592       var = create_tmp_var_raw (integer_type_node, "test_var");
3593       TREE_STATIC (var) = 1;
3594       x = produce_memory_decl_rtl (var, NULL);
3595       SET_DECL_RTL (var, x);
3596
3597       addr = build1 (ADDR_EXPR, type, var);
3598
3599
3600       for (i = 0; i < 2; i++)
3601         {
3602           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3603                                                              2000), i);
3604
3605           symbol_cost[i] = computation_cost (addr, i) + 1;
3606
3607           address_cost[i]
3608             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3609           if (dump_file && (dump_flags & TDF_DETAILS))
3610             {
3611               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3612               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3613               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3614               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3615               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3616               fprintf (dump_file, "\n");
3617             }
3618         }
3619
3620       costs_initialized = true;
3621     }
3622
3623   STRIP_NOPS (expr);
3624
3625   if (SSA_VAR_P (expr))
3626     return no_cost;
3627
3628   if (is_gimple_min_invariant (expr))
3629     {
3630       if (TREE_CODE (expr) == INTEGER_CST)
3631         return new_cost (integer_cost [speed], 0);
3632
3633       if (TREE_CODE (expr) == ADDR_EXPR)
3634         {
3635           tree obj = TREE_OPERAND (expr, 0);
3636
3637           if (TREE_CODE (obj) == VAR_DECL
3638               || TREE_CODE (obj) == PARM_DECL
3639               || TREE_CODE (obj) == RESULT_DECL)
3640             return new_cost (symbol_cost [speed], 0);
3641         }
3642
3643       return new_cost (address_cost [speed], 0);
3644     }
3645
3646   switch (TREE_CODE (expr))
3647     {
3648     case POINTER_PLUS_EXPR:
3649     case PLUS_EXPR:
3650     case MINUS_EXPR:
3651     case MULT_EXPR:
3652       op0 = TREE_OPERAND (expr, 0);
3653       op1 = TREE_OPERAND (expr, 1);
3654       STRIP_NOPS (op0);
3655       STRIP_NOPS (op1);
3656       break;
3657
3658     CASE_CONVERT:
3659     case NEGATE_EXPR:
3660       op0 = TREE_OPERAND (expr, 0);
3661       STRIP_NOPS (op0);
3662       op1 = NULL_TREE;
3663       break;
3664
3665     default:
3666       /* Just an arbitrary value, FIXME.  */
3667       return new_cost (target_spill_cost[speed], 0);
3668     }
3669
3670   if (op0 == NULL_TREE
3671       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3672     cost0 = no_cost;
3673   else
3674     cost0 = force_expr_to_var_cost (op0, speed);
3675
3676   if (op1 == NULL_TREE
3677       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3678     cost1 = no_cost;
3679   else
3680     cost1 = force_expr_to_var_cost (op1, speed);
3681
3682   mode = TYPE_MODE (TREE_TYPE (expr));
3683   switch (TREE_CODE (expr))
3684     {
3685     case POINTER_PLUS_EXPR:
3686     case PLUS_EXPR:
3687     case MINUS_EXPR:
3688     case NEGATE_EXPR:
3689       cost = new_cost (add_cost (speed, mode), 0);
3690       if (TREE_CODE (expr) != NEGATE_EXPR)
3691         {
3692           tree mult = NULL_TREE;
3693           comp_cost sa_cost;
3694           if (TREE_CODE (op1) == MULT_EXPR)
3695             mult = op1;
3696           else if (TREE_CODE (op0) == MULT_EXPR)
3697             mult = op0;
3698
3699           if (mult != NULL_TREE
3700               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3701               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3702                                     speed, &sa_cost))
3703             return sa_cost;
3704         }
3705       break;
3706
3707     CASE_CONVERT:
3708       {
3709         tree inner_mode, outer_mode;
3710         outer_mode = TREE_TYPE (expr);
3711         inner_mode = TREE_TYPE (op0);
3712         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3713                                        TYPE_MODE (inner_mode), speed), 0);
3714       }
3715       break;
3716
3717     case MULT_EXPR:
3718       if (cst_and_fits_in_hwi (op0))
3719         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3720                                              mode, speed), 0);
3721       else if (cst_and_fits_in_hwi (op1))
3722         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3723                                              mode, speed), 0);
3724       else
3725         return new_cost (target_spill_cost [speed], 0);
3726       break;
3727
3728     default:
3729       gcc_unreachable ();
3730     }
3731
3732   cost = add_costs (cost, cost0);
3733   cost = add_costs (cost, cost1);
3734
3735   /* Bound the cost by target_spill_cost.  The parts of complicated
3736      computations often are either loop invariant or at least can
3737      be shared between several iv uses, so letting this grow without
3738      limits would not give reasonable results.  */
3739   if (cost.cost > (int) target_spill_cost [speed])
3740     cost.cost = target_spill_cost [speed];
3741
3742   return cost;
3743 }
3744
3745 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3746    invariants the computation depends on.  */
3747
3748 static comp_cost
3749 force_var_cost (struct ivopts_data *data,
3750                 tree expr, bitmap *depends_on)
3751 {
3752   if (depends_on)
3753     {
3754       fd_ivopts_data = data;
3755       walk_tree (&expr, find_depends, depends_on, NULL);
3756     }
3757
3758   return force_expr_to_var_cost (expr, data->speed);
3759 }
3760
3761 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3762    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3763    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3764    invariants the computation depends on.  */
3765
3766 static comp_cost
3767 split_address_cost (struct ivopts_data *data,
3768                     tree addr, bool *symbol_present, bool *var_present,
3769                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3770 {
3771   tree core;
3772   HOST_WIDE_INT bitsize;
3773   HOST_WIDE_INT bitpos;
3774   tree toffset;
3775   enum machine_mode mode;
3776   int unsignedp, volatilep;
3777
3778   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3779                               &unsignedp, &volatilep, false);
3780
3781   if (toffset != 0
3782       || bitpos % BITS_PER_UNIT != 0
3783       || TREE_CODE (core) != VAR_DECL)
3784     {
3785       *symbol_present = false;
3786       *var_present = true;
3787       fd_ivopts_data = data;
3788       walk_tree (&addr, find_depends, depends_on, NULL);
3789       return new_cost (target_spill_cost[data->speed], 0);
3790     }
3791
3792   *offset += bitpos / BITS_PER_UNIT;
3793   if (TREE_STATIC (core)
3794       || DECL_EXTERNAL (core))
3795     {
3796       *symbol_present = true;
3797       *var_present = false;
3798       return no_cost;
3799     }
3800
3801   *symbol_present = false;
3802   *var_present = true;
3803   return no_cost;
3804 }
3805
3806 /* Estimates cost of expressing difference of addresses E1 - E2 as
3807    var + symbol + offset.  The value of offset is added to OFFSET,
3808    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3809    part is missing.  DEPENDS_ON is a set of the invariants the computation
3810    depends on.  */
3811
3812 static comp_cost
3813 ptr_difference_cost (struct ivopts_data *data,
3814                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3815                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3816 {
3817   HOST_WIDE_INT diff = 0;
3818   aff_tree aff_e1, aff_e2;
3819   tree type;
3820
3821   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3822
3823   if (ptr_difference_const (e1, e2, &diff))
3824     {
3825       *offset += diff;
3826       *symbol_present = false;
3827       *var_present = false;
3828       return no_cost;
3829     }
3830
3831   if (integer_zerop (e2))
3832     return split_address_cost (data, TREE_OPERAND (e1, 0),
3833                                symbol_present, var_present, offset, depends_on);
3834
3835   *symbol_present = false;
3836   *var_present = true;
3837
3838   type = signed_type_for (TREE_TYPE (e1));
3839   tree_to_aff_combination (e1, type, &aff_e1);
3840   tree_to_aff_combination (e2, type, &aff_e2);
3841   aff_combination_scale (&aff_e2, double_int_minus_one);
3842   aff_combination_add (&aff_e1, &aff_e2);
3843
3844   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3845 }
3846
3847 /* Estimates cost of expressing difference E1 - E2 as
3848    var + symbol + offset.  The value of offset is added to OFFSET,
3849    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3850    part is missing.  DEPENDS_ON is a set of the invariants the computation
3851    depends on.  */
3852
3853 static comp_cost
3854 difference_cost (struct ivopts_data *data,
3855                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3856                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3857 {
3858   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3859   unsigned HOST_WIDE_INT off1, off2;
3860   aff_tree aff_e1, aff_e2;
3861   tree type;
3862
3863   e1 = strip_offset (e1, &off1);
3864   e2 = strip_offset (e2, &off2);
3865   *offset += off1 - off2;
3866
3867   STRIP_NOPS (e1);
3868   STRIP_NOPS (e2);
3869
3870   if (TREE_CODE (e1) == ADDR_EXPR)
3871     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3872                                 offset, depends_on);
3873   *symbol_present = false;
3874
3875   if (operand_equal_p (e1, e2, 0))
3876     {
3877       *var_present = false;
3878       return no_cost;
3879     }
3880
3881   *var_present = true;
3882
3883   if (integer_zerop (e2))
3884     return force_var_cost (data, e1, depends_on);
3885
3886   if (integer_zerop (e1))
3887     {
3888       comp_cost cost = force_var_cost (data, e2, depends_on);
3889       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3890       return cost;
3891     }
3892
3893   type = signed_type_for (TREE_TYPE (e1));
3894   tree_to_aff_combination (e1, type, &aff_e1);
3895   tree_to_aff_combination (e2, type, &aff_e2);
3896   aff_combination_scale (&aff_e2, double_int_minus_one);
3897   aff_combination_add (&aff_e1, &aff_e2);
3898
3899   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3900 }
3901
3902 /* Returns true if AFF1 and AFF2 are identical.  */
3903
3904 static bool
3905 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3906 {
3907   unsigned i;
3908
3909   if (aff1->n != aff2->n)
3910     return false;
3911
3912   for (i = 0; i < aff1->n; i++)
3913     {
3914       if (aff1->elts[i].coef != aff2->elts[i].coef)
3915         return false;
3916
3917       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3918         return false;
3919     }
3920   return true;
3921 }
3922
3923 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3924
3925 static int
3926 get_expr_id (struct ivopts_data *data, tree expr)
3927 {
3928   struct iv_inv_expr_ent ent;
3929   struct iv_inv_expr_ent **slot;
3930
3931   ent.expr = expr;
3932   ent.hash = iterative_hash_expr (expr, 0);
3933   slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3934   if (*slot)
3935     return (*slot)->id;
3936
3937   *slot = XNEW (struct iv_inv_expr_ent);
3938   (*slot)->expr = expr;
3939   (*slot)->hash = ent.hash;
3940   (*slot)->id = data->inv_expr_id++;
3941   return (*slot)->id;
3942 }
3943
3944 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3945    requires a new compiler generated temporary.  Returns -1 otherwise.
3946    ADDRESS_P is a flag indicating if the expression is for address
3947    computation.  */
3948
3949 static int
3950 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3951                             tree cbase, HOST_WIDE_INT ratio,
3952                             bool address_p)
3953 {
3954   aff_tree ubase_aff, cbase_aff;
3955   tree expr, ub, cb;
3956
3957   STRIP_NOPS (ubase);
3958   STRIP_NOPS (cbase);
3959   ub = ubase;
3960   cb = cbase;
3961
3962   if ((TREE_CODE (ubase) == INTEGER_CST)
3963       && (TREE_CODE (cbase) == INTEGER_CST))
3964     return -1;
3965
3966   /* Strips the constant part. */
3967   if (TREE_CODE (ubase) == PLUS_EXPR
3968       || TREE_CODE (ubase) == MINUS_EXPR
3969       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3970     {
3971       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3972         ubase = TREE_OPERAND (ubase, 0);
3973     }
3974
3975   /* Strips the constant part. */
3976   if (TREE_CODE (cbase) == PLUS_EXPR
3977       || TREE_CODE (cbase) == MINUS_EXPR
3978       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3979     {
3980       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3981         cbase = TREE_OPERAND (cbase, 0);
3982     }
3983
3984   if (address_p)
3985     {
3986       if (((TREE_CODE (ubase) == SSA_NAME)
3987            || (TREE_CODE (ubase) == ADDR_EXPR
3988                && is_gimple_min_invariant (ubase)))
3989           && (TREE_CODE (cbase) == INTEGER_CST))
3990         return -1;
3991
3992       if (((TREE_CODE (cbase) == SSA_NAME)
3993            || (TREE_CODE (cbase) == ADDR_EXPR
3994                && is_gimple_min_invariant (cbase)))
3995           && (TREE_CODE (ubase) == INTEGER_CST))
3996         return -1;
3997     }
3998
3999   if (ratio == 1)
4000     {
4001       if (operand_equal_p (ubase, cbase, 0))
4002         return -1;
4003
4004       if (TREE_CODE (ubase) == ADDR_EXPR
4005           && TREE_CODE (cbase) == ADDR_EXPR)
4006         {
4007           tree usym, csym;
4008
4009           usym = TREE_OPERAND (ubase, 0);
4010           csym = TREE_OPERAND (cbase, 0);
4011           if (TREE_CODE (usym) == ARRAY_REF)
4012             {
4013               tree ind = TREE_OPERAND (usym, 1);
4014               if (TREE_CODE (ind) == INTEGER_CST
4015                   && tree_fits_shwi_p (ind)
4016                   && tree_to_shwi (ind) == 0)
4017                 usym = TREE_OPERAND (usym, 0);
4018             }
4019           if (TREE_CODE (csym) == ARRAY_REF)
4020             {
4021               tree ind = TREE_OPERAND (csym, 1);
4022               if (TREE_CODE (ind) == INTEGER_CST
4023                   && tree_fits_shwi_p (ind)
4024                   && tree_to_shwi (ind) == 0)
4025                 csym = TREE_OPERAND (csym, 0);
4026             }
4027           if (operand_equal_p (usym, csym, 0))
4028             return -1;
4029         }
4030       /* Now do more complex comparison  */
4031       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4032       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4033       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4034         return -1;
4035     }
4036
4037   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4038   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4039
4040   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
4041   aff_combination_add (&ubase_aff, &cbase_aff);
4042   expr = aff_combination_to_tree (&ubase_aff);
4043   return get_expr_id (data, expr);
4044 }
4045
4046
4047
4048 /* Determines the cost of the computation by that USE is expressed
4049    from induction variable CAND.  If ADDRESS_P is true, we just need
4050    to create an address from it, otherwise we want to get it into
4051    register.  A set of invariants we depend on is stored in
4052    DEPENDS_ON.  AT is the statement at that the value is computed.
4053    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4054    addressing is likely.  */
4055
4056 static comp_cost
4057 get_computation_cost_at (struct ivopts_data *data,
4058                          struct iv_use *use, struct iv_cand *cand,
4059                          bool address_p, bitmap *depends_on, gimple at,
4060                          bool *can_autoinc,
4061                          int *inv_expr_id)
4062 {
4063   tree ubase = use->iv->base, ustep = use->iv->step;
4064   tree cbase, cstep;
4065   tree utype = TREE_TYPE (ubase), ctype;
4066   unsigned HOST_WIDE_INT cstepi, offset = 0;
4067   HOST_WIDE_INT ratio, aratio;
4068   bool var_present, symbol_present, stmt_is_after_inc;
4069   comp_cost cost;
4070   double_int rat;
4071   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4072   enum machine_mode mem_mode = (address_p
4073                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4074                                 : VOIDmode);
4075
4076   *depends_on = NULL;
4077
4078   /* Only consider real candidates.  */
4079   if (!cand->iv)
4080     return infinite_cost;
4081
4082   cbase = cand->iv->base;
4083   cstep = cand->iv->step;
4084   ctype = TREE_TYPE (cbase);
4085
4086   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4087     {
4088       /* We do not have a precision to express the values of use.  */
4089       return infinite_cost;
4090     }
4091
4092   if (address_p
4093       || (use->iv->base_object
4094           && cand->iv->base_object
4095           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4096           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4097     {
4098       /* Do not try to express address of an object with computation based
4099          on address of a different object.  This may cause problems in rtl
4100          level alias analysis (that does not expect this to be happening,
4101          as this is illegal in C), and would be unlikely to be useful
4102          anyway.  */
4103       if (use->iv->base_object
4104           && cand->iv->base_object
4105           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4106         return infinite_cost;
4107     }
4108
4109   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4110     {
4111       /* TODO -- add direct handling of this case.  */
4112       goto fallback;
4113     }
4114
4115   /* CSTEPI is removed from the offset in case statement is after the
4116      increment.  If the step is not constant, we use zero instead.
4117      This is a bit imprecise (there is the extra addition), but
4118      redundancy elimination is likely to transform the code so that
4119      it uses value of the variable before increment anyway,
4120      so it is not that much unrealistic.  */
4121   if (cst_and_fits_in_hwi (cstep))
4122     cstepi = int_cst_value (cstep);
4123   else
4124     cstepi = 0;
4125
4126   if (!constant_multiple_of (ustep, cstep, &rat))
4127     return infinite_cost;
4128
4129   if (rat.fits_shwi ())
4130     ratio = rat.to_shwi ();
4131   else
4132     return infinite_cost;
4133
4134   STRIP_NOPS (cbase);
4135   ctype = TREE_TYPE (cbase);
4136
4137   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4138
4139   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4140      or ratio == 1, it is better to handle this like
4141
4142      ubase - ratio * cbase + ratio * var
4143
4144      (also holds in the case ratio == -1, TODO.  */
4145
4146   if (cst_and_fits_in_hwi (cbase))
4147     {
4148       offset = - ratio * int_cst_value (cbase);
4149       cost = difference_cost (data,
4150                               ubase, build_int_cst (utype, 0),
4151                               &symbol_present, &var_present, &offset,
4152                               depends_on);
4153       cost.cost /= avg_loop_niter (data->current_loop);
4154     }
4155   else if (ratio == 1)
4156     {
4157       tree real_cbase = cbase;
4158
4159       /* Check to see if any adjustment is needed.  */
4160       if (cstepi == 0 && stmt_is_after_inc)
4161         {
4162           aff_tree real_cbase_aff;
4163           aff_tree cstep_aff;
4164
4165           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4166                                    &real_cbase_aff);
4167           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4168
4169           aff_combination_add (&real_cbase_aff, &cstep_aff);
4170           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4171         }
4172
4173       cost = difference_cost (data,
4174                               ubase, real_cbase,
4175                               &symbol_present, &var_present, &offset,
4176                               depends_on);
4177       cost.cost /= avg_loop_niter (data->current_loop);
4178     }
4179   else if (address_p
4180            && !POINTER_TYPE_P (ctype)
4181            && multiplier_allowed_in_address_p
4182                 (ratio, mem_mode,
4183                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4184     {
4185       cbase
4186         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4187       cost = difference_cost (data,
4188                               ubase, cbase,
4189                               &symbol_present, &var_present, &offset,
4190                               depends_on);
4191       cost.cost /= avg_loop_niter (data->current_loop);
4192     }
4193   else
4194     {
4195       cost = force_var_cost (data, cbase, depends_on);
4196       cost = add_costs (cost,
4197                         difference_cost (data,
4198                                          ubase, build_int_cst (utype, 0),
4199                                          &symbol_present, &var_present,
4200                                          &offset, depends_on));
4201       cost.cost /= avg_loop_niter (data->current_loop);
4202       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4203     }
4204
4205   if (inv_expr_id)
4206     {
4207       *inv_expr_id =
4208           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4209       /* Clear depends on.  */
4210       if (*inv_expr_id != -1 && depends_on && *depends_on)
4211         bitmap_clear (*depends_on);
4212     }
4213
4214   /* If we are after the increment, the value of the candidate is higher by
4215      one iteration.  */
4216   if (stmt_is_after_inc)
4217     offset -= ratio * cstepi;
4218
4219   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4220      (symbol/var1/const parts may be omitted).  If we are looking for an
4221      address, find the cost of addressing this.  */
4222   if (address_p)
4223     return add_costs (cost,
4224                       get_address_cost (symbol_present, var_present,
4225                                         offset, ratio, cstepi,
4226                                         mem_mode,
4227                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4228                                         speed, stmt_is_after_inc,
4229                                         can_autoinc));
4230
4231   /* Otherwise estimate the costs for computing the expression.  */
4232   if (!symbol_present && !var_present && !offset)
4233     {
4234       if (ratio != 1)
4235         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4236       return cost;
4237     }
4238
4239   /* Symbol + offset should be compile-time computable so consider that they
4240       are added once to the variable, if present.  */
4241   if (var_present && (symbol_present || offset))
4242     cost.cost += adjust_setup_cost (data,
4243                                     add_cost (speed, TYPE_MODE (ctype)));
4244
4245   /* Having offset does not affect runtime cost in case it is added to
4246      symbol, but it increases complexity.  */
4247   if (offset)
4248     cost.complexity++;
4249
4250   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4251
4252   aratio = ratio > 0 ? ratio : -ratio;
4253   if (aratio != 1)
4254     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4255   return cost;
4256
4257 fallback:
4258   if (can_autoinc)
4259     *can_autoinc = false;
4260
4261   {
4262     /* Just get the expression, expand it and measure the cost.  */
4263     tree comp = get_computation_at (data->current_loop, use, cand, at);
4264
4265     if (!comp)
4266       return infinite_cost;
4267
4268     if (address_p)
4269       comp = build_simple_mem_ref (comp);
4270
4271     return new_cost (computation_cost (comp, speed), 0);
4272   }
4273 }
4274
4275 /* Determines the cost of the computation by that USE is expressed
4276    from induction variable CAND.  If ADDRESS_P is true, we just need
4277    to create an address from it, otherwise we want to get it into
4278    register.  A set of invariants we depend on is stored in
4279    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4280    autoinc addressing is likely.  */
4281
4282 static comp_cost
4283 get_computation_cost (struct ivopts_data *data,
4284                       struct iv_use *use, struct iv_cand *cand,
4285                       bool address_p, bitmap *depends_on,
4286                       bool *can_autoinc, int *inv_expr_id)
4287 {
4288   return get_computation_cost_at (data,
4289                                   use, cand, address_p, depends_on, use->stmt,
4290                                   can_autoinc, inv_expr_id);
4291 }
4292
4293 /* Determines cost of basing replacement of USE on CAND in a generic
4294    expression.  */
4295
4296 static bool
4297 determine_use_iv_cost_generic (struct ivopts_data *data,
4298                                struct iv_use *use, struct iv_cand *cand)
4299 {
4300   bitmap depends_on;
4301   comp_cost cost;
4302   int inv_expr_id = -1;
4303
4304   /* The simple case first -- if we need to express value of the preserved
4305      original biv, the cost is 0.  This also prevents us from counting the
4306      cost of increment twice -- once at this use and once in the cost of
4307      the candidate.  */
4308   if (cand->pos == IP_ORIGINAL
4309       && cand->incremented_at == use->stmt)
4310     {
4311       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4312                        ERROR_MARK, -1);
4313       return true;
4314     }
4315
4316   cost = get_computation_cost (data, use, cand, false, &depends_on,
4317                                NULL, &inv_expr_id);
4318
4319   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4320                    inv_expr_id);
4321
4322   return !infinite_cost_p (cost);
4323 }
4324
4325 /* Determines cost of basing replacement of USE on CAND in an address.  */
4326
4327 static bool
4328 determine_use_iv_cost_address (struct ivopts_data *data,
4329                                struct iv_use *use, struct iv_cand *cand)
4330 {
4331   bitmap depends_on;
4332   bool can_autoinc;
4333   int inv_expr_id = -1;
4334   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4335                                          &can_autoinc, &inv_expr_id);
4336
4337   if (cand->ainc_use == use)
4338     {
4339       if (can_autoinc)
4340         cost.cost -= cand->cost_step;
4341       /* If we generated the candidate solely for exploiting autoincrement
4342          opportunities, and it turns out it can't be used, set the cost to
4343          infinity to make sure we ignore it.  */
4344       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4345         cost = infinite_cost;
4346     }
4347   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4348                    inv_expr_id);
4349
4350   return !infinite_cost_p (cost);
4351 }
4352
4353 /* Computes value of candidate CAND at position AT in iteration NITER, and
4354    stores it to VAL.  */
4355
4356 static void
4357 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4358                aff_tree *val)
4359 {
4360   aff_tree step, delta, nit;
4361   struct iv *iv = cand->iv;
4362   tree type = TREE_TYPE (iv->base);
4363   tree steptype = type;
4364   if (POINTER_TYPE_P (type))
4365     steptype = sizetype;
4366   steptype = unsigned_type_for (type);
4367
4368   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4369   aff_combination_convert (&step, steptype);
4370   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4371   aff_combination_convert (&nit, steptype);
4372   aff_combination_mult (&nit, &step, &delta);
4373   if (stmt_after_increment (loop, cand, at))
4374     aff_combination_add (&delta, &step);
4375
4376   tree_to_aff_combination (iv->base, type, val);
4377   if (!POINTER_TYPE_P (type))
4378     aff_combination_convert (val, steptype);
4379   aff_combination_add (val, &delta);
4380 }
4381
4382 /* Returns period of induction variable iv.  */
4383
4384 static tree
4385 iv_period (struct iv *iv)
4386 {
4387   tree step = iv->step, period, type;
4388   tree pow2div;
4389
4390   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4391
4392   type = unsigned_type_for (TREE_TYPE (step));
4393   /* Period of the iv is lcm (step, type_range)/step -1,
4394      i.e., N*type_range/step - 1. Since type range is power
4395      of two, N == (step >> num_of_ending_zeros_binary (step),
4396      so the final result is
4397
4398        (type_range >> num_of_ending_zeros_binary (step)) - 1
4399
4400   */
4401   pow2div = num_ending_zeros (step);
4402
4403   period = build_low_bits_mask (type,
4404                                 (TYPE_PRECISION (type)
4405                                  - tree_to_uhwi (pow2div)));
4406
4407   return period;
4408 }
4409
4410 /* Returns the comparison operator used when eliminating the iv USE.  */
4411
4412 static enum tree_code
4413 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4414 {
4415   struct loop *loop = data->current_loop;
4416   basic_block ex_bb;
4417   edge exit;
4418
4419   ex_bb = gimple_bb (use->stmt);
4420   exit = EDGE_SUCC (ex_bb, 0);
4421   if (flow_bb_inside_loop_p (loop, exit->dest))
4422     exit = EDGE_SUCC (ex_bb, 1);
4423
4424   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4425 }
4426
4427 static tree
4428 strip_wrap_conserving_type_conversions (tree exp)
4429 {
4430   while (tree_ssa_useless_type_conversion (exp)
4431          && (nowrap_type_p (TREE_TYPE (exp))
4432              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4433     exp = TREE_OPERAND (exp, 0);
4434   return exp;
4435 }
4436
4437 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4438    check for an exact match.  */
4439
4440 static bool
4441 expr_equal_p (tree e, tree what)
4442 {
4443   gimple stmt;
4444   enum tree_code code;
4445
4446   e = strip_wrap_conserving_type_conversions (e);
4447   what = strip_wrap_conserving_type_conversions (what);
4448
4449   code = TREE_CODE (what);
4450   if (TREE_TYPE (e) != TREE_TYPE (what))
4451     return false;
4452
4453   if (operand_equal_p (e, what, 0))
4454     return true;
4455
4456   if (TREE_CODE (e) != SSA_NAME)
4457     return false;
4458
4459   stmt = SSA_NAME_DEF_STMT (e);
4460   if (gimple_code (stmt) != GIMPLE_ASSIGN
4461       || gimple_assign_rhs_code (stmt) != code)
4462     return false;
4463
4464   switch (get_gimple_rhs_class (code))
4465     {
4466     case GIMPLE_BINARY_RHS:
4467       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4468         return false;
4469       /* Fallthru.  */
4470
4471     case GIMPLE_UNARY_RHS:
4472     case GIMPLE_SINGLE_RHS:
4473       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4474     default:
4475       return false;
4476     }
4477 }
4478
4479 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4480    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4481    calculation is performed in non-wrapping type.
4482
4483    TODO: More generally, we could test for the situation that
4484          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4485          This would require knowing the sign of OFFSET.
4486
4487          Also, we only look for the first addition in the computation of BASE.
4488          More complex analysis would be better, but introducing it just for
4489          this optimization seems like an overkill.  */
4490
4491 static bool
4492 difference_cannot_overflow_p (tree base, tree offset)
4493 {
4494   enum tree_code code;
4495   tree e1, e2;
4496
4497   if (!nowrap_type_p (TREE_TYPE (base)))
4498     return false;
4499
4500   base = expand_simple_operations (base);
4501
4502   if (TREE_CODE (base) == SSA_NAME)
4503     {
4504       gimple stmt = SSA_NAME_DEF_STMT (base);
4505
4506       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4507         return false;
4508
4509       code = gimple_assign_rhs_code (stmt);
4510       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4511         return false;
4512
4513       e1 = gimple_assign_rhs1 (stmt);
4514       e2 = gimple_assign_rhs2 (stmt);
4515     }
4516   else
4517     {
4518       code = TREE_CODE (base);
4519       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4520         return false;
4521       e1 = TREE_OPERAND (base, 0);
4522       e2 = TREE_OPERAND (base, 1);
4523     }
4524
4525   /* TODO: deeper inspection may be necessary to prove the equality.  */
4526   switch (code)
4527     {
4528     case PLUS_EXPR:
4529       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4530     case POINTER_PLUS_EXPR:
4531       return expr_equal_p (e2, offset);
4532
4533     default:
4534       return false;
4535     }
4536 }
4537
4538 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4539    comparison with CAND.  NITER describes the number of iterations of
4540    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4541
4542    We aim to handle the following situation:
4543
4544    sometype *base, *p;
4545    int a, b, i;
4546
4547    i = a;
4548    p = p_0 = base + a;
4549
4550    do
4551      {
4552        bla (*p);
4553        p++;
4554        i++;
4555      }
4556    while (i < b);
4557
4558    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4559    We aim to optimize this to
4560
4561    p = p_0 = base + a;
4562    do
4563      {
4564        bla (*p);
4565        p++;
4566      }
4567    while (p < p_0 - a + b);
4568
4569    This preserves the correctness, since the pointer arithmetics does not
4570    overflow.  More precisely:
4571
4572    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4573       overflow in computing it or the values of p.
4574    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4575       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4576
4577 static bool
4578 iv_elimination_compare_lt (struct ivopts_data *data,
4579                            struct iv_cand *cand, enum tree_code *comp_p,
4580                            struct tree_niter_desc *niter)
4581 {
4582   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4583   struct aff_tree nit, tmpa, tmpb;
4584   enum tree_code comp;
4585   HOST_WIDE_INT step;
4586
4587   /* We need to know that the candidate induction variable does not overflow.
4588      While more complex analysis may be used to prove this, for now just
4589      check that the variable appears in the original program and that it
4590      is computed in a type that guarantees no overflows.  */
4591   cand_type = TREE_TYPE (cand->iv->base);
4592   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4593     return false;
4594
4595   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4596      the calculation of the BOUND could overflow, making the comparison
4597      invalid.  */
4598   if (!data->loop_single_exit_p)
4599     return false;
4600
4601   /* We need to be able to decide whether candidate is increasing or decreasing
4602      in order to choose the right comparison operator.  */
4603   if (!cst_and_fits_in_hwi (cand->iv->step))
4604     return false;
4605   step = int_cst_value (cand->iv->step);
4606
4607   /* Check that the number of iterations matches the expected pattern:
4608      a + 1 > b ? 0 : b - a - 1.  */
4609   mbz = niter->may_be_zero;
4610   if (TREE_CODE (mbz) == GT_EXPR)
4611     {
4612       /* Handle a + 1 > b.  */
4613       tree op0 = TREE_OPERAND (mbz, 0);
4614       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4615         {
4616           a = TREE_OPERAND (op0, 0);
4617           b = TREE_OPERAND (mbz, 1);
4618         }
4619       else
4620         return false;
4621     }
4622   else if (TREE_CODE (mbz) == LT_EXPR)
4623     {
4624       tree op1 = TREE_OPERAND (mbz, 1);
4625
4626       /* Handle b < a + 1.  */
4627       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4628         {
4629           a = TREE_OPERAND (op1, 0);
4630           b = TREE_OPERAND (mbz, 0);
4631         }
4632       else
4633         return false;
4634     }
4635   else
4636     return false;
4637
4638   /* Expected number of iterations is B - A - 1.  Check that it matches
4639      the actual number, i.e., that B - A - NITER = 1.  */
4640   tree_to_aff_combination (niter->niter, nit_type, &nit);
4641   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4642   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4643   aff_combination_scale (&nit, double_int_minus_one);
4644   aff_combination_scale (&tmpa, double_int_minus_one);
4645   aff_combination_add (&tmpb, &tmpa);
4646   aff_combination_add (&tmpb, &nit);
4647   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4648     return false;
4649
4650   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4651      overflow.  */
4652   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4653                         cand->iv->step,
4654                         fold_convert (TREE_TYPE (cand->iv->step), a));
4655   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4656     return false;
4657
4658   /* Determine the new comparison operator.  */
4659   comp = step < 0 ? GT_EXPR : LT_EXPR;
4660   if (*comp_p == NE_EXPR)
4661     *comp_p = comp;
4662   else if (*comp_p == EQ_EXPR)
4663     *comp_p = invert_tree_comparison (comp, false);
4664   else
4665     gcc_unreachable ();
4666
4667   return true;
4668 }
4669
4670 /* Check whether it is possible to express the condition in USE by comparison
4671    of candidate CAND.  If so, store the value compared with to BOUND, and the
4672    comparison operator to COMP.  */
4673
4674 static bool
4675 may_eliminate_iv (struct ivopts_data *data,
4676                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4677                   enum tree_code *comp)
4678 {
4679   basic_block ex_bb;
4680   edge exit;
4681   tree period;
4682   struct loop *loop = data->current_loop;
4683   aff_tree bnd;
4684   struct tree_niter_desc *desc = NULL;
4685
4686   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4687     return false;
4688
4689   /* For now works only for exits that dominate the loop latch.
4690      TODO: extend to other conditions inside loop body.  */
4691   ex_bb = gimple_bb (use->stmt);
4692   if (use->stmt != last_stmt (ex_bb)
4693       || gimple_code (use->stmt) != GIMPLE_COND
4694       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4695     return false;
4696
4697   exit = EDGE_SUCC (ex_bb, 0);
4698   if (flow_bb_inside_loop_p (loop, exit->dest))
4699     exit = EDGE_SUCC (ex_bb, 1);
4700   if (flow_bb_inside_loop_p (loop, exit->dest))
4701     return false;
4702
4703   desc = niter_for_exit (data, exit);
4704   if (!desc)
4705     return false;
4706
4707   /* Determine whether we can use the variable to test the exit condition.
4708      This is the case iff the period of the induction variable is greater
4709      than the number of iterations for which the exit condition is true.  */
4710   period = iv_period (cand->iv);
4711
4712   /* If the number of iterations is constant, compare against it directly.  */
4713   if (TREE_CODE (desc->niter) == INTEGER_CST)
4714     {
4715       /* See cand_value_at.  */
4716       if (stmt_after_increment (loop, cand, use->stmt))
4717         {
4718           if (!tree_int_cst_lt (desc->niter, period))
4719             return false;
4720         }
4721       else
4722         {
4723           if (tree_int_cst_lt (period, desc->niter))
4724             return false;
4725         }
4726     }
4727
4728   /* If not, and if this is the only possible exit of the loop, see whether
4729      we can get a conservative estimate on the number of iterations of the
4730      entire loop and compare against that instead.  */
4731   else
4732     {
4733       double_int period_value, max_niter;
4734
4735       max_niter = desc->max;
4736       if (stmt_after_increment (loop, cand, use->stmt))
4737         max_niter += double_int_one;
4738       period_value = tree_to_double_int (period);
4739       if (max_niter.ugt (period_value))
4740         {
4741           /* See if we can take advantage of inferred loop bound information.  */
4742           if (data->loop_single_exit_p)
4743             {
4744               if (!max_loop_iterations (loop, &max_niter))
4745                 return false;
4746               /* The loop bound is already adjusted by adding 1.  */
4747               if (max_niter.ugt (period_value))
4748                 return false;
4749             }
4750           else
4751             return false;
4752         }
4753     }
4754
4755   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4756
4757   *bound = fold_convert (TREE_TYPE (cand->iv->base),
4758                          aff_combination_to_tree (&bnd));
4759   *comp = iv_elimination_compare (data, use);
4760
4761   /* It is unlikely that computing the number of iterations using division
4762      would be more profitable than keeping the original induction variable.  */
4763   if (expression_expensive_p (*bound))
4764     return false;
4765
4766   /* Sometimes, it is possible to handle the situation that the number of
4767      iterations may be zero unless additional assumtions by using <
4768      instead of != in the exit condition.
4769
4770      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4771            base the exit condition on it.  However, that is often too
4772            expensive.  */
4773   if (!integer_zerop (desc->may_be_zero))
4774     return iv_elimination_compare_lt (data, cand, comp, desc);
4775
4776   return true;
4777 }
4778
4779  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4780     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4781
4782 static int
4783 parm_decl_cost (struct ivopts_data *data, tree bound)
4784 {
4785   tree sbound = bound;
4786   STRIP_NOPS (sbound);
4787
4788   if (TREE_CODE (sbound) == SSA_NAME
4789       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4790       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4791       && data->body_includes_call)
4792     return COSTS_N_INSNS (1);
4793
4794   return 0;
4795 }
4796
4797 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4798
4799 static bool
4800 determine_use_iv_cost_condition (struct ivopts_data *data,
4801                                  struct iv_use *use, struct iv_cand *cand)
4802 {
4803   tree bound = NULL_TREE;
4804   struct iv *cmp_iv;
4805   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4806   comp_cost elim_cost, express_cost, cost, bound_cost;
4807   bool ok;
4808   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4809   tree *control_var, *bound_cst;
4810   enum tree_code comp = ERROR_MARK;
4811
4812   /* Only consider real candidates.  */
4813   if (!cand->iv)
4814     {
4815       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4816                        ERROR_MARK, -1);
4817       return false;
4818     }
4819
4820   /* Try iv elimination.  */
4821   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4822     {
4823       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4824       if (elim_cost.cost == 0)
4825         elim_cost.cost = parm_decl_cost (data, bound);
4826       else if (TREE_CODE (bound) == INTEGER_CST)
4827         elim_cost.cost = 0;
4828       /* If we replace a loop condition 'i < n' with 'p < base + n',
4829          depends_on_elim will have 'base' and 'n' set, which implies
4830          that both 'base' and 'n' will be live during the loop.  More likely,
4831          'base + n' will be loop invariant, resulting in only one live value
4832          during the loop.  So in that case we clear depends_on_elim and set
4833         elim_inv_expr_id instead.  */
4834       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4835         {
4836           elim_inv_expr_id = get_expr_id (data, bound);
4837           bitmap_clear (depends_on_elim);
4838         }
4839       /* The bound is a loop invariant, so it will be only computed
4840          once.  */
4841       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4842     }
4843   else
4844     elim_cost = infinite_cost;
4845
4846   /* Try expressing the original giv.  If it is compared with an invariant,
4847      note that we cannot get rid of it.  */
4848   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4849                               NULL, &cmp_iv);
4850   gcc_assert (ok);
4851
4852   /* When the condition is a comparison of the candidate IV against
4853      zero, prefer this IV.
4854
4855      TODO: The constant that we're subtracting from the cost should
4856      be target-dependent.  This information should be added to the
4857      target costs for each backend.  */
4858   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4859       && integer_zerop (*bound_cst)
4860       && (operand_equal_p (*control_var, cand->var_after, 0)
4861           || operand_equal_p (*control_var, cand->var_before, 0)))
4862     elim_cost.cost -= 1;
4863
4864   express_cost = get_computation_cost (data, use, cand, false,
4865                                        &depends_on_express, NULL,
4866                                        &express_inv_expr_id);
4867   fd_ivopts_data = data;
4868   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4869
4870   /* Count the cost of the original bound as well.  */
4871   bound_cost = force_var_cost (data, *bound_cst, NULL);
4872   if (bound_cost.cost == 0)
4873     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4874   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4875     bound_cost.cost = 0;
4876   express_cost.cost += bound_cost.cost;
4877
4878   /* Choose the better approach, preferring the eliminated IV. */
4879   if (compare_costs (elim_cost, express_cost) <= 0)
4880     {
4881       cost = elim_cost;
4882       depends_on = depends_on_elim;
4883       depends_on_elim = NULL;
4884       inv_expr_id = elim_inv_expr_id;
4885     }
4886   else
4887     {
4888       cost = express_cost;
4889       depends_on = depends_on_express;
4890       depends_on_express = NULL;
4891       bound = NULL_TREE;
4892       comp = ERROR_MARK;
4893       inv_expr_id = express_inv_expr_id;
4894     }
4895
4896   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4897
4898   if (depends_on_elim)
4899     BITMAP_FREE (depends_on_elim);
4900   if (depends_on_express)
4901     BITMAP_FREE (depends_on_express);
4902
4903   return !infinite_cost_p (cost);
4904 }
4905
4906 /* Determines cost of basing replacement of USE on CAND.  Returns false
4907    if USE cannot be based on CAND.  */
4908
4909 static bool
4910 determine_use_iv_cost (struct ivopts_data *data,
4911                        struct iv_use *use, struct iv_cand *cand)
4912 {
4913   switch (use->type)
4914     {
4915     case USE_NONLINEAR_EXPR:
4916       return determine_use_iv_cost_generic (data, use, cand);
4917
4918     case USE_ADDRESS:
4919       return determine_use_iv_cost_address (data, use, cand);
4920
4921     case USE_COMPARE:
4922       return determine_use_iv_cost_condition (data, use, cand);
4923
4924     default:
4925       gcc_unreachable ();
4926     }
4927 }
4928
4929 /* Return true if get_computation_cost indicates that autoincrement is
4930    a possibility for the pair of USE and CAND, false otherwise.  */
4931
4932 static bool
4933 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4934                            struct iv_cand *cand)
4935 {
4936   bitmap depends_on;
4937   bool can_autoinc;
4938   comp_cost cost;
4939
4940   if (use->type != USE_ADDRESS)
4941     return false;
4942
4943   cost = get_computation_cost (data, use, cand, true, &depends_on,
4944                                &can_autoinc, NULL);
4945
4946   BITMAP_FREE (depends_on);
4947
4948   return !infinite_cost_p (cost) && can_autoinc;
4949 }
4950
4951 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4952    use that allows autoincrement, and set their AINC_USE if possible.  */
4953
4954 static void
4955 set_autoinc_for_original_candidates (struct ivopts_data *data)
4956 {
4957   unsigned i, j;
4958
4959   for (i = 0; i < n_iv_cands (data); i++)
4960     {
4961       struct iv_cand *cand = iv_cand (data, i);
4962       struct iv_use *closest_before = NULL;
4963       struct iv_use *closest_after = NULL;
4964       if (cand->pos != IP_ORIGINAL)
4965         continue;
4966
4967       for (j = 0; j < n_iv_uses (data); j++)
4968         {
4969           struct iv_use *use = iv_use (data, j);
4970           unsigned uid = gimple_uid (use->stmt);
4971
4972           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4973             continue;
4974
4975           if (uid < gimple_uid (cand->incremented_at)
4976               && (closest_before == NULL
4977                   || uid > gimple_uid (closest_before->stmt)))
4978             closest_before = use;
4979
4980           if (uid > gimple_uid (cand->incremented_at)
4981               && (closest_after == NULL
4982                   || uid < gimple_uid (closest_after->stmt)))
4983             closest_after = use;
4984         }
4985
4986       if (closest_before != NULL
4987           && autoinc_possible_for_pair (data, closest_before, cand))
4988         cand->ainc_use = closest_before;
4989       else if (closest_after != NULL
4990                && autoinc_possible_for_pair (data, closest_after, cand))
4991         cand->ainc_use = closest_after;
4992     }
4993 }
4994
4995 /* Finds the candidates for the induction variables.  */
4996
4997 static void
4998 find_iv_candidates (struct ivopts_data *data)
4999 {
5000   /* Add commonly used ivs.  */
5001   add_standard_iv_candidates (data);
5002
5003   /* Add old induction variables.  */
5004   add_old_ivs_candidates (data);
5005
5006   /* Add induction variables derived from uses.  */
5007   add_derived_ivs_candidates (data);
5008
5009   set_autoinc_for_original_candidates (data);
5010
5011   /* Record the important candidates.  */
5012   record_important_candidates (data);
5013 }
5014
5015 /* Determines costs of basing the use of the iv on an iv candidate.  */
5016
5017 static void
5018 determine_use_iv_costs (struct ivopts_data *data)
5019 {
5020   unsigned i, j;
5021   struct iv_use *use;
5022   struct iv_cand *cand;
5023   bitmap to_clear = BITMAP_ALLOC (NULL);
5024
5025   alloc_use_cost_map (data);
5026
5027   for (i = 0; i < n_iv_uses (data); i++)
5028     {
5029       use = iv_use (data, i);
5030
5031       if (data->consider_all_candidates)
5032         {
5033           for (j = 0; j < n_iv_cands (data); j++)
5034             {
5035               cand = iv_cand (data, j);
5036               determine_use_iv_cost (data, use, cand);
5037             }
5038         }
5039       else
5040         {
5041           bitmap_iterator bi;
5042
5043           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5044             {
5045               cand = iv_cand (data, j);
5046               if (!determine_use_iv_cost (data, use, cand))
5047                 bitmap_set_bit (to_clear, j);
5048             }
5049
5050           /* Remove the candidates for that the cost is infinite from
5051              the list of related candidates.  */
5052           bitmap_and_compl_into (use->related_cands, to_clear);
5053           bitmap_clear (to_clear);
5054         }
5055     }
5056
5057   BITMAP_FREE (to_clear);
5058
5059   if (dump_file && (dump_flags & TDF_DETAILS))
5060     {
5061       fprintf (dump_file, "Use-candidate costs:\n");
5062
5063       for (i = 0; i < n_iv_uses (data); i++)
5064         {
5065           use = iv_use (data, i);
5066
5067           fprintf (dump_file, "Use %d:\n", i);
5068           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5069           for (j = 0; j < use->n_map_members; j++)
5070             {
5071               if (!use->cost_map[j].cand
5072                   || infinite_cost_p (use->cost_map[j].cost))
5073                 continue;
5074
5075               fprintf (dump_file, "  %d\t%d\t%d\t",
5076                        use->cost_map[j].cand->id,
5077                        use->cost_map[j].cost.cost,
5078                        use->cost_map[j].cost.complexity);
5079               if (use->cost_map[j].depends_on)
5080                 bitmap_print (dump_file,
5081                               use->cost_map[j].depends_on, "","");
5082               if (use->cost_map[j].inv_expr_id != -1)
5083                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5084               fprintf (dump_file, "\n");
5085             }
5086
5087           fprintf (dump_file, "\n");
5088         }
5089       fprintf (dump_file, "\n");
5090     }
5091 }
5092
5093 /* Determines cost of the candidate CAND.  */
5094
5095 static void
5096 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5097 {
5098   comp_cost cost_base;
5099   unsigned cost, cost_step;
5100   tree base;
5101
5102   if (!cand->iv)
5103     {
5104       cand->cost = 0;
5105       return;
5106     }
5107
5108   /* There are two costs associated with the candidate -- its increment
5109      and its initialization.  The second is almost negligible for any loop
5110      that rolls enough, so we take it just very little into account.  */
5111
5112   base = cand->iv->base;
5113   cost_base = force_var_cost (data, base, NULL);
5114   /* It will be exceptional that the iv register happens to be initialized with
5115      the proper value at no cost.  In general, there will at least be a regcopy
5116      or a const set.  */
5117   if (cost_base.cost == 0)
5118     cost_base.cost = COSTS_N_INSNS (1);
5119   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5120
5121   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5122
5123   /* Prefer the original ivs unless we may gain something by replacing it.
5124      The reason is to make debugging simpler; so this is not relevant for
5125      artificial ivs created by other optimization passes.  */
5126   if (cand->pos != IP_ORIGINAL
5127       || !SSA_NAME_VAR (cand->var_before)
5128       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5129     cost++;
5130
5131   /* Prefer not to insert statements into latch unless there are some
5132      already (so that we do not create unnecessary jumps).  */
5133   if (cand->pos == IP_END
5134       && empty_block_p (ip_end_pos (data->current_loop)))
5135     cost++;
5136
5137   cand->cost = cost;
5138   cand->cost_step = cost_step;
5139 }
5140
5141 /* Determines costs of computation of the candidates.  */
5142
5143 static void
5144 determine_iv_costs (struct ivopts_data *data)
5145 {
5146   unsigned i;
5147
5148   if (dump_file && (dump_flags & TDF_DETAILS))
5149     {
5150       fprintf (dump_file, "Candidate costs:\n");
5151       fprintf (dump_file, "  cand\tcost\n");
5152     }
5153
5154   for (i = 0; i < n_iv_cands (data); i++)
5155     {
5156       struct iv_cand *cand = iv_cand (data, i);
5157
5158       determine_iv_cost (data, cand);
5159
5160       if (dump_file && (dump_flags & TDF_DETAILS))
5161         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5162     }
5163
5164   if (dump_file && (dump_flags & TDF_DETAILS))
5165     fprintf (dump_file, "\n");
5166 }
5167
5168 /* Calculates cost for having SIZE induction variables.  */
5169
5170 static unsigned
5171 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5172 {
5173   /* We add size to the cost, so that we prefer eliminating ivs
5174      if possible.  */
5175   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5176                                             data->body_includes_call);
5177 }
5178
5179 /* For each size of the induction variable set determine the penalty.  */
5180
5181 static void
5182 determine_set_costs (struct ivopts_data *data)
5183 {
5184   unsigned j, n;
5185   gimple phi;
5186   gimple_stmt_iterator psi;
5187   tree op;
5188   struct loop *loop = data->current_loop;
5189   bitmap_iterator bi;
5190
5191   if (dump_file && (dump_flags & TDF_DETAILS))
5192     {
5193       fprintf (dump_file, "Global costs:\n");
5194       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5195       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5196       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5197       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5198     }
5199
5200   n = 0;
5201   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5202     {
5203       phi = gsi_stmt (psi);
5204       op = PHI_RESULT (phi);
5205
5206       if (virtual_operand_p (op))
5207         continue;
5208
5209       if (get_iv (data, op))
5210         continue;
5211
5212       n++;
5213     }
5214
5215   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5216     {
5217       struct version_info *info = ver_info (data, j);
5218
5219       if (info->inv_id && info->has_nonlin_use)
5220         n++;
5221     }
5222
5223   data->regs_used = n;
5224   if (dump_file && (dump_flags & TDF_DETAILS))
5225     fprintf (dump_file, "  regs_used %d\n", n);
5226
5227   if (dump_file && (dump_flags & TDF_DETAILS))
5228     {
5229       fprintf (dump_file, "  cost for size:\n");
5230       fprintf (dump_file, "  ivs\tcost\n");
5231       for (j = 0; j <= 2 * target_avail_regs; j++)
5232         fprintf (dump_file, "  %d\t%d\n", j,
5233                  ivopts_global_cost_for_size (data, j));
5234       fprintf (dump_file, "\n");
5235     }
5236 }
5237
5238 /* Returns true if A is a cheaper cost pair than B.  */
5239
5240 static bool
5241 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5242 {
5243   int cmp;
5244
5245   if (!a)
5246     return false;
5247
5248   if (!b)
5249     return true;
5250
5251   cmp = compare_costs (a->cost, b->cost);
5252   if (cmp < 0)
5253     return true;
5254
5255   if (cmp > 0)
5256     return false;
5257
5258   /* In case the costs are the same, prefer the cheaper candidate.  */
5259   if (a->cand->cost < b->cand->cost)
5260     return true;
5261
5262   return false;
5263 }
5264
5265
5266 /* Returns candidate by that USE is expressed in IVS.  */
5267
5268 static struct cost_pair *
5269 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5270 {
5271   return ivs->cand_for_use[use->id];
5272 }
5273
5274 /* Computes the cost field of IVS structure.  */
5275
5276 static void
5277 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5278 {
5279   comp_cost cost = ivs->cand_use_cost;
5280
5281   cost.cost += ivs->cand_cost;
5282
5283   cost.cost += ivopts_global_cost_for_size (data,
5284                                             ivs->n_regs + ivs->num_used_inv_expr);
5285
5286   ivs->cost = cost;
5287 }
5288
5289 /* Remove invariants in set INVS to set IVS.  */
5290
5291 static void
5292 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5293 {
5294   bitmap_iterator bi;
5295   unsigned iid;
5296
5297   if (!invs)
5298     return;
5299
5300   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5301     {
5302       ivs->n_invariant_uses[iid]--;
5303       if (ivs->n_invariant_uses[iid] == 0)
5304         ivs->n_regs--;
5305     }
5306 }
5307
5308 /* Set USE not to be expressed by any candidate in IVS.  */
5309
5310 static void
5311 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5312                  struct iv_use *use)
5313 {
5314   unsigned uid = use->id, cid;
5315   struct cost_pair *cp;
5316
5317   cp = ivs->cand_for_use[uid];
5318   if (!cp)
5319     return;
5320   cid = cp->cand->id;
5321
5322   ivs->bad_uses++;
5323   ivs->cand_for_use[uid] = NULL;
5324   ivs->n_cand_uses[cid]--;
5325
5326   if (ivs->n_cand_uses[cid] == 0)
5327     {
5328       bitmap_clear_bit (ivs->cands, cid);
5329       /* Do not count the pseudocandidates.  */
5330       if (cp->cand->iv)
5331         ivs->n_regs--;
5332       ivs->n_cands--;
5333       ivs->cand_cost -= cp->cand->cost;
5334
5335       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5336     }
5337
5338   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5339
5340   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5341
5342   if (cp->inv_expr_id != -1)
5343     {
5344       ivs->used_inv_expr[cp->inv_expr_id]--;
5345       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5346         ivs->num_used_inv_expr--;
5347     }
5348   iv_ca_recount_cost (data, ivs);
5349 }
5350
5351 /* Add invariants in set INVS to set IVS.  */
5352
5353 static void
5354 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5355 {
5356   bitmap_iterator bi;
5357   unsigned iid;
5358
5359   if (!invs)
5360     return;
5361
5362   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5363     {
5364       ivs->n_invariant_uses[iid]++;
5365       if (ivs->n_invariant_uses[iid] == 1)
5366         ivs->n_regs++;
5367     }
5368 }
5369
5370 /* Set cost pair for USE in set IVS to CP.  */
5371
5372 static void
5373 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5374               struct iv_use *use, struct cost_pair *cp)
5375 {
5376   unsigned uid = use->id, cid;
5377
5378   if (ivs->cand_for_use[uid] == cp)
5379     return;
5380
5381   if (ivs->cand_for_use[uid])
5382     iv_ca_set_no_cp (data, ivs, use);
5383
5384   if (cp)
5385     {
5386       cid = cp->cand->id;
5387
5388       ivs->bad_uses--;
5389       ivs->cand_for_use[uid] = cp;
5390       ivs->n_cand_uses[cid]++;
5391       if (ivs->n_cand_uses[cid] == 1)
5392         {
5393           bitmap_set_bit (ivs->cands, cid);
5394           /* Do not count the pseudocandidates.  */
5395           if (cp->cand->iv)
5396             ivs->n_regs++;
5397           ivs->n_cands++;
5398           ivs->cand_cost += cp->cand->cost;
5399
5400           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5401         }
5402
5403       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5404       iv_ca_set_add_invariants (ivs, cp->depends_on);
5405
5406       if (cp->inv_expr_id != -1)
5407         {
5408           ivs->used_inv_expr[cp->inv_expr_id]++;
5409           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5410             ivs->num_used_inv_expr++;
5411         }
5412       iv_ca_recount_cost (data, ivs);
5413     }
5414 }
5415
5416 /* Extend set IVS by expressing USE by some of the candidates in it
5417    if possible. All important candidates will be considered
5418    if IMPORTANT_CANDIDATES is true.  */
5419
5420 static void
5421 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5422                struct iv_use *use, bool important_candidates)
5423 {
5424   struct cost_pair *best_cp = NULL, *cp;
5425   bitmap_iterator bi;
5426   bitmap cands;
5427   unsigned i;
5428
5429   gcc_assert (ivs->upto >= use->id);
5430
5431   if (ivs->upto == use->id)
5432     {
5433       ivs->upto++;
5434       ivs->bad_uses++;
5435     }
5436
5437   cands = (important_candidates ? data->important_candidates : ivs->cands);
5438   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5439     {
5440       struct iv_cand *cand = iv_cand (data, i);
5441
5442       cp = get_use_iv_cost (data, use, cand);
5443
5444       if (cheaper_cost_pair (cp, best_cp))
5445         best_cp = cp;
5446     }
5447
5448   iv_ca_set_cp (data, ivs, use, best_cp);
5449 }
5450
5451 /* Get cost for assignment IVS.  */
5452
5453 static comp_cost
5454 iv_ca_cost (struct iv_ca *ivs)
5455 {
5456   /* This was a conditional expression but it triggered a bug in
5457      Sun C 5.5.  */
5458   if (ivs->bad_uses)
5459     return infinite_cost;
5460   else
5461     return ivs->cost;
5462 }
5463
5464 /* Returns true if all dependences of CP are among invariants in IVS.  */
5465
5466 static bool
5467 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5468 {
5469   unsigned i;
5470   bitmap_iterator bi;
5471
5472   if (!cp->depends_on)
5473     return true;
5474
5475   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5476     {
5477       if (ivs->n_invariant_uses[i] == 0)
5478         return false;
5479     }
5480
5481   return true;
5482 }
5483
5484 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5485    it before NEXT_CHANGE.  */
5486
5487 static struct iv_ca_delta *
5488 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5489                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5490 {
5491   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5492
5493   change->use = use;
5494   change->old_cp = old_cp;
5495   change->new_cp = new_cp;
5496   change->next_change = next_change;
5497
5498   return change;
5499 }
5500
5501 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5502    are rewritten.  */
5503
5504 static struct iv_ca_delta *
5505 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5506 {
5507   struct iv_ca_delta *last;
5508
5509   if (!l2)
5510     return l1;
5511
5512   if (!l1)
5513     return l2;
5514
5515   for (last = l1; last->next_change; last = last->next_change)
5516     continue;
5517   last->next_change = l2;
5518
5519   return l1;
5520 }
5521
5522 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5523
5524 static struct iv_ca_delta *
5525 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5526 {
5527   struct iv_ca_delta *act, *next, *prev = NULL;
5528   struct cost_pair *tmp;
5529
5530   for (act = delta; act; act = next)
5531     {
5532       next = act->next_change;
5533       act->next_change = prev;
5534       prev = act;
5535
5536       tmp = act->old_cp;
5537       act->old_cp = act->new_cp;
5538       act->new_cp = tmp;
5539     }
5540
5541   return prev;
5542 }
5543
5544 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5545    reverted instead.  */
5546
5547 static void
5548 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5549                     struct iv_ca_delta *delta, bool forward)
5550 {
5551   struct cost_pair *from, *to;
5552   struct iv_ca_delta *act;
5553
5554   if (!forward)
5555     delta = iv_ca_delta_reverse (delta);
5556
5557   for (act = delta; act; act = act->next_change)
5558     {
5559       from = act->old_cp;
5560       to = act->new_cp;
5561       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5562       iv_ca_set_cp (data, ivs, act->use, to);
5563     }
5564
5565   if (!forward)
5566     iv_ca_delta_reverse (delta);
5567 }
5568
5569 /* Returns true if CAND is used in IVS.  */
5570
5571 static bool
5572 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5573 {
5574   return ivs->n_cand_uses[cand->id] > 0;
5575 }
5576
5577 /* Returns number of induction variable candidates in the set IVS.  */
5578
5579 static unsigned
5580 iv_ca_n_cands (struct iv_ca *ivs)
5581 {
5582   return ivs->n_cands;
5583 }
5584
5585 /* Free the list of changes DELTA.  */
5586
5587 static void
5588 iv_ca_delta_free (struct iv_ca_delta **delta)
5589 {
5590   struct iv_ca_delta *act, *next;
5591
5592   for (act = *delta; act; act = next)
5593     {
5594       next = act->next_change;
5595       free (act);
5596     }
5597
5598   *delta = NULL;
5599 }
5600
5601 /* Allocates new iv candidates assignment.  */
5602
5603 static struct iv_ca *
5604 iv_ca_new (struct ivopts_data *data)
5605 {
5606   struct iv_ca *nw = XNEW (struct iv_ca);
5607
5608   nw->upto = 0;
5609   nw->bad_uses = 0;
5610   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5611   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5612   nw->cands = BITMAP_ALLOC (NULL);
5613   nw->n_cands = 0;
5614   nw->n_regs = 0;
5615   nw->cand_use_cost = no_cost;
5616   nw->cand_cost = 0;
5617   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5618   nw->cost = no_cost;
5619   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5620   nw->num_used_inv_expr = 0;
5621
5622   return nw;
5623 }
5624
5625 /* Free memory occupied by the set IVS.  */
5626
5627 static void
5628 iv_ca_free (struct iv_ca **ivs)
5629 {
5630   free ((*ivs)->cand_for_use);
5631   free ((*ivs)->n_cand_uses);
5632   BITMAP_FREE ((*ivs)->cands);
5633   free ((*ivs)->n_invariant_uses);
5634   free ((*ivs)->used_inv_expr);
5635   free (*ivs);
5636   *ivs = NULL;
5637 }
5638
5639 /* Dumps IVS to FILE.  */
5640
5641 static void
5642 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5643 {
5644   const char *pref = "  invariants ";
5645   unsigned i;
5646   comp_cost cost = iv_ca_cost (ivs);
5647
5648   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5649   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5650            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5651   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5652
5653    for (i = 0; i < ivs->upto; i++)
5654     {
5655       struct iv_use *use = iv_use (data, i);
5656       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5657       if (cp)
5658         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5659                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5660       else
5661         fprintf (file, "   use:%d --> ??\n", use->id);
5662     }
5663
5664   for (i = 1; i <= data->max_inv_id; i++)
5665     if (ivs->n_invariant_uses[i])
5666       {
5667         fprintf (file, "%s%d", pref, i);
5668         pref = ", ";
5669       }
5670   fprintf (file, "\n\n");
5671 }
5672
5673 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5674    new set, and store differences in DELTA.  Number of induction variables
5675    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5676    the function will try to find a solution with mimimal iv candidates.  */
5677
5678 static comp_cost
5679 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5680               struct iv_cand *cand, struct iv_ca_delta **delta,
5681               unsigned *n_ivs, bool min_ncand)
5682 {
5683   unsigned i;
5684   comp_cost cost;
5685   struct iv_use *use;
5686   struct cost_pair *old_cp, *new_cp;
5687
5688   *delta = NULL;
5689   for (i = 0; i < ivs->upto; i++)
5690     {
5691       use = iv_use (data, i);
5692       old_cp = iv_ca_cand_for_use (ivs, use);
5693
5694       if (old_cp
5695           && old_cp->cand == cand)
5696         continue;
5697
5698       new_cp = get_use_iv_cost (data, use, cand);
5699       if (!new_cp)
5700         continue;
5701
5702       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5703         continue;
5704
5705       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5706         continue;
5707
5708       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5709     }
5710
5711   iv_ca_delta_commit (data, ivs, *delta, true);
5712   cost = iv_ca_cost (ivs);
5713   if (n_ivs)
5714     *n_ivs = iv_ca_n_cands (ivs);
5715   iv_ca_delta_commit (data, ivs, *delta, false);
5716
5717   return cost;
5718 }
5719
5720 /* Try narrowing set IVS by removing CAND.  Return the cost of
5721    the new set and store the differences in DELTA.  START is
5722    the candidate with which we start narrowing.  */
5723
5724 static comp_cost
5725 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5726               struct iv_cand *cand, struct iv_cand *start,
5727               struct iv_ca_delta **delta)
5728 {
5729   unsigned i, ci;
5730   struct iv_use *use;
5731   struct cost_pair *old_cp, *new_cp, *cp;
5732   bitmap_iterator bi;
5733   struct iv_cand *cnd;
5734   comp_cost cost, best_cost, acost;
5735
5736   *delta = NULL;
5737   for (i = 0; i < n_iv_uses (data); i++)
5738     {
5739       use = iv_use (data, i);
5740
5741       old_cp = iv_ca_cand_for_use (ivs, use);
5742       if (old_cp->cand != cand)
5743         continue;
5744
5745       best_cost = iv_ca_cost (ivs);
5746       /* Start narrowing with START.  */
5747       new_cp = get_use_iv_cost (data, use, start);
5748
5749       if (data->consider_all_candidates)
5750         {
5751           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5752             {
5753               if (ci == cand->id || (start && ci == start->id))
5754                 continue;
5755
5756               cnd = iv_cand (data, ci);
5757
5758               cp = get_use_iv_cost (data, use, cnd);
5759               if (!cp)
5760                 continue;
5761
5762               iv_ca_set_cp (data, ivs, use, cp);
5763               acost = iv_ca_cost (ivs);
5764
5765               if (compare_costs (acost, best_cost) < 0)
5766                 {
5767                   best_cost = acost;
5768                   new_cp = cp;
5769                 }
5770             }
5771         }
5772       else
5773         {
5774           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5775             {
5776               if (ci == cand->id || (start && ci == start->id))
5777                 continue;
5778
5779               cnd = iv_cand (data, ci);
5780
5781               cp = get_use_iv_cost (data, use, cnd);
5782               if (!cp)
5783                 continue;
5784
5785               iv_ca_set_cp (data, ivs, use, cp);
5786               acost = iv_ca_cost (ivs);
5787
5788               if (compare_costs (acost, best_cost) < 0)
5789                 {
5790                   best_cost = acost;
5791                   new_cp = cp;
5792                 }
5793             }
5794         }
5795       /* Restore to old cp for use.  */
5796       iv_ca_set_cp (data, ivs, use, old_cp);
5797
5798       if (!new_cp)
5799         {
5800           iv_ca_delta_free (delta);
5801           return infinite_cost;
5802         }
5803
5804       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5805     }
5806
5807   iv_ca_delta_commit (data, ivs, *delta, true);
5808   cost = iv_ca_cost (ivs);
5809   iv_ca_delta_commit (data, ivs, *delta, false);
5810
5811   return cost;
5812 }
5813
5814 /* Try optimizing the set of candidates IVS by removing candidates different
5815    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5816    differences in DELTA.  */
5817
5818 static comp_cost
5819 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5820              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5821 {
5822   bitmap_iterator bi;
5823   struct iv_ca_delta *act_delta, *best_delta;
5824   unsigned i;
5825   comp_cost best_cost, acost;
5826   struct iv_cand *cand;
5827
5828   best_delta = NULL;
5829   best_cost = iv_ca_cost (ivs);
5830
5831   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5832     {
5833       cand = iv_cand (data, i);
5834
5835       if (cand == except_cand)
5836         continue;
5837
5838       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5839
5840       if (compare_costs (acost, best_cost) < 0)
5841         {
5842           best_cost = acost;
5843           iv_ca_delta_free (&best_delta);
5844           best_delta = act_delta;
5845         }
5846       else
5847         iv_ca_delta_free (&act_delta);
5848     }
5849
5850   if (!best_delta)
5851     {
5852       *delta = NULL;
5853       return best_cost;
5854     }
5855
5856   /* Recurse to possibly remove other unnecessary ivs.  */
5857   iv_ca_delta_commit (data, ivs, best_delta, true);
5858   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5859   iv_ca_delta_commit (data, ivs, best_delta, false);
5860   *delta = iv_ca_delta_join (best_delta, *delta);
5861   return best_cost;
5862 }
5863
5864 /* Tries to extend the sets IVS in the best possible way in order
5865    to express the USE.  If ORIGINALP is true, prefer candidates from
5866    the original set of IVs, otherwise favor important candidates not
5867    based on any memory object.  */
5868
5869 static bool
5870 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5871                   struct iv_use *use, bool originalp)
5872 {
5873   comp_cost best_cost, act_cost;
5874   unsigned i;
5875   bitmap_iterator bi;
5876   struct iv_cand *cand;
5877   struct iv_ca_delta *best_delta = NULL, *act_delta;
5878   struct cost_pair *cp;
5879
5880   iv_ca_add_use (data, ivs, use, false);
5881   best_cost = iv_ca_cost (ivs);
5882
5883   cp = iv_ca_cand_for_use (ivs, use);
5884   if (!cp)
5885     {
5886       ivs->upto--;
5887       ivs->bad_uses--;
5888       iv_ca_add_use (data, ivs, use, true);
5889       best_cost = iv_ca_cost (ivs);
5890       cp = iv_ca_cand_for_use (ivs, use);
5891     }
5892   if (cp)
5893     {
5894       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5895       iv_ca_set_no_cp (data, ivs, use);
5896     }
5897
5898   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5899      first try important candidates not based on any memory object.  Only if
5900      this fails, try the specific ones.  Rationale -- in loops with many
5901      variables the best choice often is to use just one generic biv.  If we
5902      added here many ivs specific to the uses, the optimization algorithm later
5903      would be likely to get stuck in a local minimum, thus causing us to create
5904      too many ivs.  The approach from few ivs to more seems more likely to be
5905      successful -- starting from few ivs, replacing an expensive use by a
5906      specific iv should always be a win.  */
5907   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5908     {
5909       cand = iv_cand (data, i);
5910
5911       if (originalp && cand->pos !=IP_ORIGINAL)
5912         continue;
5913
5914       if (!originalp && cand->iv->base_object != NULL_TREE)
5915         continue;
5916
5917       if (iv_ca_cand_used_p (ivs, cand))
5918         continue;
5919
5920       cp = get_use_iv_cost (data, use, cand);
5921       if (!cp)
5922         continue;
5923
5924       iv_ca_set_cp (data, ivs, use, cp);
5925       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5926                                true);
5927       iv_ca_set_no_cp (data, ivs, use);
5928       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5929
5930       if (compare_costs (act_cost, best_cost) < 0)
5931         {
5932           best_cost = act_cost;
5933
5934           iv_ca_delta_free (&best_delta);
5935           best_delta = act_delta;
5936         }
5937       else
5938         iv_ca_delta_free (&act_delta);
5939     }
5940
5941   if (infinite_cost_p (best_cost))
5942     {
5943       for (i = 0; i < use->n_map_members; i++)
5944         {
5945           cp = use->cost_map + i;
5946           cand = cp->cand;
5947           if (!cand)
5948             continue;
5949
5950           /* Already tried this.  */
5951           if (cand->important)
5952             {
5953               if (originalp && cand->pos == IP_ORIGINAL)
5954                 continue;
5955               if (!originalp && cand->iv->base_object == NULL_TREE)
5956                 continue;
5957             }
5958
5959           if (iv_ca_cand_used_p (ivs, cand))
5960             continue;
5961
5962           act_delta = NULL;
5963           iv_ca_set_cp (data, ivs, use, cp);
5964           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5965           iv_ca_set_no_cp (data, ivs, use);
5966           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5967                                        cp, act_delta);
5968
5969           if (compare_costs (act_cost, best_cost) < 0)
5970             {
5971               best_cost = act_cost;
5972
5973               if (best_delta)
5974                 iv_ca_delta_free (&best_delta);
5975               best_delta = act_delta;
5976             }
5977           else
5978             iv_ca_delta_free (&act_delta);
5979         }
5980     }
5981
5982   iv_ca_delta_commit (data, ivs, best_delta, true);
5983   iv_ca_delta_free (&best_delta);
5984
5985   return !infinite_cost_p (best_cost);
5986 }
5987
5988 /* Finds an initial assignment of candidates to uses.  */
5989
5990 static struct iv_ca *
5991 get_initial_solution (struct ivopts_data *data, bool originalp)
5992 {
5993   struct iv_ca *ivs = iv_ca_new (data);
5994   unsigned i;
5995
5996   for (i = 0; i < n_iv_uses (data); i++)
5997     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5998       {
5999         iv_ca_free (&ivs);
6000         return NULL;
6001       }
6002
6003   return ivs;
6004 }
6005
6006 /* Tries to improve set of induction variables IVS.  */
6007
6008 static bool
6009 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6010 {
6011   unsigned i, n_ivs;
6012   comp_cost acost, best_cost = iv_ca_cost (ivs);
6013   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6014   struct iv_cand *cand;
6015
6016   /* Try extending the set of induction variables by one.  */
6017   for (i = 0; i < n_iv_cands (data); i++)
6018     {
6019       cand = iv_cand (data, i);
6020
6021       if (iv_ca_cand_used_p (ivs, cand))
6022         continue;
6023
6024       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6025       if (!act_delta)
6026         continue;
6027
6028       /* If we successfully added the candidate and the set is small enough,
6029          try optimizing it by removing other candidates.  */
6030       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6031         {
6032           iv_ca_delta_commit (data, ivs, act_delta, true);
6033           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6034           iv_ca_delta_commit (data, ivs, act_delta, false);
6035           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6036         }
6037
6038       if (compare_costs (acost, best_cost) < 0)
6039         {
6040           best_cost = acost;
6041           iv_ca_delta_free (&best_delta);
6042           best_delta = act_delta;
6043         }
6044       else
6045         iv_ca_delta_free (&act_delta);
6046     }
6047
6048   if (!best_delta)
6049     {
6050       /* Try removing the candidates from the set instead.  */
6051       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6052
6053       /* Nothing more we can do.  */
6054       if (!best_delta)
6055         return false;
6056     }
6057
6058   iv_ca_delta_commit (data, ivs, best_delta, true);
6059   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6060   iv_ca_delta_free (&best_delta);
6061   return true;
6062 }
6063
6064 /* Attempts to find the optimal set of induction variables.  We do simple
6065    greedy heuristic -- we try to replace at most one candidate in the selected
6066    solution and remove the unused ivs while this improves the cost.  */
6067
6068 static struct iv_ca *
6069 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6070 {
6071   struct iv_ca *set;
6072
6073   /* Get the initial solution.  */
6074   set = get_initial_solution (data, originalp);
6075   if (!set)
6076     {
6077       if (dump_file && (dump_flags & TDF_DETAILS))
6078         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6079       return NULL;
6080     }
6081
6082   if (dump_file && (dump_flags & TDF_DETAILS))
6083     {
6084       fprintf (dump_file, "Initial set of candidates:\n");
6085       iv_ca_dump (data, dump_file, set);
6086     }
6087
6088   while (try_improve_iv_set (data, set))
6089     {
6090       if (dump_file && (dump_flags & TDF_DETAILS))
6091         {
6092           fprintf (dump_file, "Improved to:\n");
6093           iv_ca_dump (data, dump_file, set);
6094         }
6095     }
6096
6097   return set;
6098 }
6099
6100 static struct iv_ca *
6101 find_optimal_iv_set (struct ivopts_data *data)
6102 {
6103   unsigned i;
6104   struct iv_ca *set, *origset;
6105   struct iv_use *use;
6106   comp_cost cost, origcost;
6107
6108   /* Determine the cost based on a strategy that starts with original IVs,
6109      and try again using a strategy that prefers candidates not based
6110      on any IVs.  */
6111   origset = find_optimal_iv_set_1 (data, true);
6112   set = find_optimal_iv_set_1 (data, false);
6113
6114   if (!origset && !set)
6115     return NULL;
6116
6117   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6118   cost = set ? iv_ca_cost (set) : infinite_cost;
6119
6120   if (dump_file && (dump_flags & TDF_DETAILS))
6121     {
6122       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6123                origcost.cost, origcost.complexity);
6124       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6125                cost.cost, cost.complexity);
6126     }
6127
6128   /* Choose the one with the best cost.  */
6129   if (compare_costs (origcost, cost) <= 0)
6130     {
6131       if (set)
6132         iv_ca_free (&set);
6133       set = origset;
6134     }
6135   else if (origset)
6136     iv_ca_free (&origset);
6137
6138   for (i = 0; i < n_iv_uses (data); i++)
6139     {
6140       use = iv_use (data, i);
6141       use->selected = iv_ca_cand_for_use (set, use)->cand;
6142     }
6143
6144   return set;
6145 }
6146
6147 /* Creates a new induction variable corresponding to CAND.  */
6148
6149 static void
6150 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6151 {
6152   gimple_stmt_iterator incr_pos;
6153   tree base;
6154   bool after = false;
6155
6156   if (!cand->iv)
6157     return;
6158
6159   switch (cand->pos)
6160     {
6161     case IP_NORMAL:
6162       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6163       break;
6164
6165     case IP_END:
6166       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6167       after = true;
6168       break;
6169
6170     case IP_AFTER_USE:
6171       after = true;
6172       /* fall through */
6173     case IP_BEFORE_USE:
6174       incr_pos = gsi_for_stmt (cand->incremented_at);
6175       break;
6176
6177     case IP_ORIGINAL:
6178       /* Mark that the iv is preserved.  */
6179       name_info (data, cand->var_before)->preserve_biv = true;
6180       name_info (data, cand->var_after)->preserve_biv = true;
6181
6182       /* Rewrite the increment so that it uses var_before directly.  */
6183       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6184       return;
6185     }
6186
6187   gimple_add_tmp_var (cand->var_before);
6188
6189   base = unshare_expr (cand->iv->base);
6190
6191   create_iv (base, unshare_expr (cand->iv->step),
6192              cand->var_before, data->current_loop,
6193              &incr_pos, after, &cand->var_before, &cand->var_after);
6194 }
6195
6196 /* Creates new induction variables described in SET.  */
6197
6198 static void
6199 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6200 {
6201   unsigned i;
6202   struct iv_cand *cand;
6203   bitmap_iterator bi;
6204
6205   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6206     {
6207       cand = iv_cand (data, i);
6208       create_new_iv (data, cand);
6209     }
6210
6211   if (dump_file && (dump_flags & TDF_DETAILS))
6212     {
6213       fprintf (dump_file, "\nSelected IV set: \n");
6214       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6215         {
6216           cand = iv_cand (data, i);
6217           dump_cand (dump_file, cand);
6218         }
6219       fprintf (dump_file, "\n");
6220     }
6221 }
6222
6223 /* Rewrites USE (definition of iv used in a nonlinear expression)
6224    using candidate CAND.  */
6225
6226 static void
6227 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6228                             struct iv_use *use, struct iv_cand *cand)
6229 {
6230   tree comp;
6231   tree op, tgt;
6232   gimple ass;
6233   gimple_stmt_iterator bsi;
6234
6235   /* An important special case -- if we are asked to express value of
6236      the original iv by itself, just exit; there is no need to
6237      introduce a new computation (that might also need casting the
6238      variable to unsigned and back).  */
6239   if (cand->pos == IP_ORIGINAL
6240       && cand->incremented_at == use->stmt)
6241     {
6242       enum tree_code stmt_code;
6243
6244       gcc_assert (is_gimple_assign (use->stmt));
6245       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6246
6247       /* Check whether we may leave the computation unchanged.
6248          This is the case only if it does not rely on other
6249          computations in the loop -- otherwise, the computation
6250          we rely upon may be removed in remove_unused_ivs,
6251          thus leading to ICE.  */
6252       stmt_code = gimple_assign_rhs_code (use->stmt);
6253       if (stmt_code == PLUS_EXPR
6254           || stmt_code == MINUS_EXPR
6255           || stmt_code == POINTER_PLUS_EXPR)
6256         {
6257           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6258             op = gimple_assign_rhs2 (use->stmt);
6259           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6260             op = gimple_assign_rhs1 (use->stmt);
6261           else
6262             op = NULL_TREE;
6263         }
6264       else
6265         op = NULL_TREE;
6266
6267       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6268         return;
6269     }
6270
6271   comp = get_computation (data->current_loop, use, cand);
6272   gcc_assert (comp != NULL_TREE);
6273
6274   switch (gimple_code (use->stmt))
6275     {
6276     case GIMPLE_PHI:
6277       tgt = PHI_RESULT (use->stmt);
6278
6279       /* If we should keep the biv, do not replace it.  */
6280       if (name_info (data, tgt)->preserve_biv)
6281         return;
6282
6283       bsi = gsi_after_labels (gimple_bb (use->stmt));
6284       break;
6285
6286     case GIMPLE_ASSIGN:
6287       tgt = gimple_assign_lhs (use->stmt);
6288       bsi = gsi_for_stmt (use->stmt);
6289       break;
6290
6291     default:
6292       gcc_unreachable ();
6293     }
6294
6295   if (!valid_gimple_rhs_p (comp)
6296       || (gimple_code (use->stmt) != GIMPLE_PHI
6297           /* We can't allow re-allocating the stmt as it might be pointed
6298              to still.  */
6299           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6300               >= gimple_num_ops (gsi_stmt (bsi)))))
6301     {
6302       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6303                                        true, GSI_SAME_STMT);
6304       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6305         {
6306           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6307           /* As this isn't a plain copy we have to reset alignment
6308              information.  */
6309           if (SSA_NAME_PTR_INFO (comp))
6310             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6311         }
6312     }
6313
6314   if (gimple_code (use->stmt) == GIMPLE_PHI)
6315     {
6316       ass = gimple_build_assign (tgt, comp);
6317       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6318
6319       bsi = gsi_for_stmt (use->stmt);
6320       remove_phi_node (&bsi, false);
6321     }
6322   else
6323     {
6324       gimple_assign_set_rhs_from_tree (&bsi, comp);
6325       use->stmt = gsi_stmt (bsi);
6326     }
6327 }
6328
6329 /* Performs a peephole optimization to reorder the iv update statement with
6330    a mem ref to enable instruction combining in later phases. The mem ref uses
6331    the iv value before the update, so the reordering transformation requires
6332    adjustment of the offset. CAND is the selected IV_CAND.
6333
6334    Example:
6335
6336    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6337    iv2 = iv1 + 1;
6338
6339    if (t < val)      (1)
6340      goto L;
6341    goto Head;
6342
6343
6344    directly propagating t over to (1) will introduce overlapping live range
6345    thus increase register pressure. This peephole transform it into:
6346
6347
6348    iv2 = iv1 + 1;
6349    t = MEM_REF (base, iv2, 8, 8);
6350    if (t < val)
6351      goto L;
6352    goto Head;
6353 */
6354
6355 static void
6356 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6357 {
6358   tree var_after;
6359   gimple iv_update, stmt;
6360   basic_block bb;
6361   gimple_stmt_iterator gsi, gsi_iv;
6362
6363   if (cand->pos != IP_NORMAL)
6364     return;
6365
6366   var_after = cand->var_after;
6367   iv_update = SSA_NAME_DEF_STMT (var_after);
6368
6369   bb = gimple_bb (iv_update);
6370   gsi = gsi_last_nondebug_bb (bb);
6371   stmt = gsi_stmt (gsi);
6372
6373   /* Only handle conditional statement for now.  */
6374   if (gimple_code (stmt) != GIMPLE_COND)
6375     return;
6376
6377   gsi_prev_nondebug (&gsi);
6378   stmt = gsi_stmt (gsi);
6379   if (stmt != iv_update)
6380     return;
6381
6382   gsi_prev_nondebug (&gsi);
6383   if (gsi_end_p (gsi))
6384     return;
6385
6386   stmt = gsi_stmt (gsi);
6387   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6388     return;
6389
6390   if (stmt != use->stmt)
6391     return;
6392
6393   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6394     return;
6395
6396   if (dump_file && (dump_flags & TDF_DETAILS))
6397     {
6398       fprintf (dump_file, "Reordering \n");
6399       print_gimple_stmt (dump_file, iv_update, 0, 0);
6400       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6401       fprintf (dump_file, "\n");
6402     }
6403
6404   gsi = gsi_for_stmt (use->stmt);
6405   gsi_iv = gsi_for_stmt (iv_update);
6406   gsi_move_before (&gsi_iv, &gsi);
6407
6408   cand->pos = IP_BEFORE_USE;
6409   cand->incremented_at = use->stmt;
6410 }
6411
6412 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6413
6414 static void
6415 rewrite_use_address (struct ivopts_data *data,
6416                      struct iv_use *use, struct iv_cand *cand)
6417 {
6418   aff_tree aff;
6419   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6420   tree base_hint = NULL_TREE;
6421   tree ref, iv;
6422   bool ok;
6423
6424   adjust_iv_update_pos (cand, use);
6425   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6426   gcc_assert (ok);
6427   unshare_aff_combination (&aff);
6428
6429   /* To avoid undefined overflow problems, all IV candidates use unsigned
6430      integer types.  The drawback is that this makes it impossible for
6431      create_mem_ref to distinguish an IV that is based on a memory object
6432      from one that represents simply an offset.
6433
6434      To work around this problem, we pass a hint to create_mem_ref that
6435      indicates which variable (if any) in aff is an IV based on a memory
6436      object.  Note that we only consider the candidate.  If this is not
6437      based on an object, the base of the reference is in some subexpression
6438      of the use -- but these will use pointer types, so they are recognized
6439      by the create_mem_ref heuristics anyway.  */
6440   if (cand->iv->base_object)
6441     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6442
6443   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6444   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6445                         reference_alias_ptr_type (*use->op_p),
6446                         iv, base_hint, data->speed);
6447   copy_ref_info (ref, *use->op_p);
6448   *use->op_p = ref;
6449 }
6450
6451 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6452    candidate CAND.  */
6453
6454 static void
6455 rewrite_use_compare (struct ivopts_data *data,
6456                      struct iv_use *use, struct iv_cand *cand)
6457 {
6458   tree comp, *var_p, op, bound;
6459   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6460   enum tree_code compare;
6461   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6462   bool ok;
6463
6464   bound = cp->value;
6465   if (bound)
6466     {
6467       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6468       tree var_type = TREE_TYPE (var);
6469       gimple_seq stmts;
6470
6471       if (dump_file && (dump_flags & TDF_DETAILS))
6472         {
6473           fprintf (dump_file, "Replacing exit test: ");
6474           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6475         }
6476       compare = cp->comp;
6477       bound = unshare_expr (fold_convert (var_type, bound));
6478       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6479       if (stmts)
6480         gsi_insert_seq_on_edge_immediate (
6481                 loop_preheader_edge (data->current_loop),
6482                 stmts);
6483
6484       gimple_cond_set_lhs (use->stmt, var);
6485       gimple_cond_set_code (use->stmt, compare);
6486       gimple_cond_set_rhs (use->stmt, op);
6487       return;
6488     }
6489
6490   /* The induction variable elimination failed; just express the original
6491      giv.  */
6492   comp = get_computation (data->current_loop, use, cand);
6493   gcc_assert (comp != NULL_TREE);
6494
6495   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6496   gcc_assert (ok);
6497
6498   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6499                                      true, GSI_SAME_STMT);
6500 }
6501
6502 /* Rewrites USE using candidate CAND.  */
6503
6504 static void
6505 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6506 {
6507   switch (use->type)
6508     {
6509       case USE_NONLINEAR_EXPR:
6510         rewrite_use_nonlinear_expr (data, use, cand);
6511         break;
6512
6513       case USE_ADDRESS:
6514         rewrite_use_address (data, use, cand);
6515         break;
6516
6517       case USE_COMPARE:
6518         rewrite_use_compare (data, use, cand);
6519         break;
6520
6521       default:
6522         gcc_unreachable ();
6523     }
6524
6525   update_stmt (use->stmt);
6526 }
6527
6528 /* Rewrite the uses using the selected induction variables.  */
6529
6530 static void
6531 rewrite_uses (struct ivopts_data *data)
6532 {
6533   unsigned i;
6534   struct iv_cand *cand;
6535   struct iv_use *use;
6536
6537   for (i = 0; i < n_iv_uses (data); i++)
6538     {
6539       use = iv_use (data, i);
6540       cand = use->selected;
6541       gcc_assert (cand);
6542
6543       rewrite_use (data, use, cand);
6544     }
6545 }
6546
6547 /* Removes the ivs that are not used after rewriting.  */
6548
6549 static void
6550 remove_unused_ivs (struct ivopts_data *data)
6551 {
6552   unsigned j;
6553   bitmap_iterator bi;
6554   bitmap toremove = BITMAP_ALLOC (NULL);
6555
6556   /* Figure out an order in which to release SSA DEFs so that we don't
6557      release something that we'd have to propagate into a debug stmt
6558      afterwards.  */
6559   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6560     {
6561       struct version_info *info;
6562
6563       info = ver_info (data, j);
6564       if (info->iv
6565           && !integer_zerop (info->iv->step)
6566           && !info->inv_id
6567           && !info->iv->have_use_for
6568           && !info->preserve_biv)
6569         {
6570           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6571
6572           tree def = info->iv->ssa_name;
6573
6574           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6575             {
6576               imm_use_iterator imm_iter;
6577               use_operand_p use_p;
6578               gimple stmt;
6579               int count = 0;
6580
6581               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6582                 {
6583                   if (!gimple_debug_bind_p (stmt))
6584                     continue;
6585
6586                   /* We just want to determine whether to do nothing
6587                      (count == 0), to substitute the computed
6588                      expression into a single use of the SSA DEF by
6589                      itself (count == 1), or to use a debug temp
6590                      because the SSA DEF is used multiple times or as
6591                      part of a larger expression (count > 1). */
6592                   count++;
6593                   if (gimple_debug_bind_get_value (stmt) != def)
6594                     count++;
6595
6596                   if (count > 1)
6597                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6598                 }
6599
6600               if (!count)
6601                 continue;
6602
6603               struct iv_use dummy_use;
6604               struct iv_cand *best_cand = NULL, *cand;
6605               unsigned i, best_pref = 0, cand_pref;
6606
6607               memset (&dummy_use, 0, sizeof (dummy_use));
6608               dummy_use.iv = info->iv;
6609               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6610                 {
6611                   cand = iv_use (data, i)->selected;
6612                   if (cand == best_cand)
6613                     continue;
6614                   cand_pref = operand_equal_p (cand->iv->step,
6615                                                info->iv->step, 0)
6616                     ? 4 : 0;
6617                   cand_pref
6618                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6619                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6620                     ? 2 : 0;
6621                   cand_pref
6622                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6623                     ? 1 : 0;
6624                   if (best_cand == NULL || best_pref < cand_pref)
6625                     {
6626                       best_cand = cand;
6627                       best_pref = cand_pref;
6628                     }
6629                 }
6630
6631               if (!best_cand)
6632                 continue;
6633
6634               tree comp = get_computation_at (data->current_loop,
6635                                               &dummy_use, best_cand,
6636                                               SSA_NAME_DEF_STMT (def));
6637               if (!comp)
6638                 continue;
6639
6640               if (count > 1)
6641                 {
6642                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6643                   DECL_ARTIFICIAL (vexpr) = 1;
6644                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6645                   if (SSA_NAME_VAR (def))
6646                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6647                   else
6648                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6649                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6650                   gimple_stmt_iterator gsi;
6651
6652                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6653                     gsi = gsi_after_labels (gimple_bb
6654                                             (SSA_NAME_DEF_STMT (def)));
6655                   else
6656                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6657
6658                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6659                   comp = vexpr;
6660                 }
6661
6662               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6663                 {
6664                   if (!gimple_debug_bind_p (stmt))
6665                     continue;
6666
6667                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6668                     SET_USE (use_p, comp);
6669
6670                   update_stmt (stmt);
6671                 }
6672             }
6673         }
6674     }
6675
6676   release_defs_bitset (toremove);
6677
6678   BITMAP_FREE (toremove);
6679 }
6680
6681 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6682    for pointer_map_traverse.  */
6683
6684 static bool
6685 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6686                       void *data ATTRIBUTE_UNUSED)
6687 {
6688   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6689
6690   free (niter);
6691   return true;
6692 }
6693
6694 /* Frees data allocated by the optimization of a single loop.  */
6695
6696 static void
6697 free_loop_data (struct ivopts_data *data)
6698 {
6699   unsigned i, j;
6700   bitmap_iterator bi;
6701   tree obj;
6702
6703   if (data->niters)
6704     {
6705       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6706       pointer_map_destroy (data->niters);
6707       data->niters = NULL;
6708     }
6709
6710   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6711     {
6712       struct version_info *info;
6713
6714       info = ver_info (data, i);
6715       free (info->iv);
6716       info->iv = NULL;
6717       info->has_nonlin_use = false;
6718       info->preserve_biv = false;
6719       info->inv_id = 0;
6720     }
6721   bitmap_clear (data->relevant);
6722   bitmap_clear (data->important_candidates);
6723
6724   for (i = 0; i < n_iv_uses (data); i++)
6725     {
6726       struct iv_use *use = iv_use (data, i);
6727
6728       free (use->iv);
6729       BITMAP_FREE (use->related_cands);
6730       for (j = 0; j < use->n_map_members; j++)
6731         if (use->cost_map[j].depends_on)
6732           BITMAP_FREE (use->cost_map[j].depends_on);
6733       free (use->cost_map);
6734       free (use);
6735     }
6736   data->iv_uses.truncate (0);
6737
6738   for (i = 0; i < n_iv_cands (data); i++)
6739     {
6740       struct iv_cand *cand = iv_cand (data, i);
6741
6742       free (cand->iv);
6743       if (cand->depends_on)
6744         BITMAP_FREE (cand->depends_on);
6745       free (cand);
6746     }
6747   data->iv_candidates.truncate (0);
6748
6749   if (data->version_info_size < num_ssa_names)
6750     {
6751       data->version_info_size = 2 * num_ssa_names;
6752       free (data->version_info);
6753       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6754     }
6755
6756   data->max_inv_id = 0;
6757
6758   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6759     SET_DECL_RTL (obj, NULL_RTX);
6760
6761   decl_rtl_to_reset.truncate (0);
6762
6763   data->inv_expr_tab.empty ();
6764   data->inv_expr_id = 0;
6765 }
6766
6767 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6768    loop tree.  */
6769
6770 static void
6771 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6772 {
6773   free_loop_data (data);
6774   free (data->version_info);
6775   BITMAP_FREE (data->relevant);
6776   BITMAP_FREE (data->important_candidates);
6777
6778   decl_rtl_to_reset.release ();
6779   data->iv_uses.release ();
6780   data->iv_candidates.release ();
6781   data->inv_expr_tab.dispose ();
6782 }
6783
6784 /* Returns true if the loop body BODY includes any function calls.  */
6785
6786 static bool
6787 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6788 {
6789   gimple_stmt_iterator gsi;
6790   unsigned i;
6791
6792   for (i = 0; i < num_nodes; i++)
6793     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6794       {
6795         gimple stmt = gsi_stmt (gsi);
6796         if (is_gimple_call (stmt)
6797             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6798           return true;
6799       }
6800   return false;
6801 }
6802
6803 /* Optimizes the LOOP.  Returns true if anything changed.  */
6804
6805 static bool
6806 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6807 {
6808   bool changed = false;
6809   struct iv_ca *iv_ca;
6810   edge exit = single_dom_exit (loop);
6811   basic_block *body;
6812
6813   gcc_assert (!data->niters);
6814   data->current_loop = loop;
6815   data->speed = optimize_loop_for_speed_p (loop);
6816
6817   if (dump_file && (dump_flags & TDF_DETAILS))
6818     {
6819       fprintf (dump_file, "Processing loop %d\n", loop->num);
6820
6821       if (exit)
6822         {
6823           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6824                    exit->src->index, exit->dest->index);
6825           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6826           fprintf (dump_file, "\n");
6827         }
6828
6829       fprintf (dump_file, "\n");
6830     }
6831
6832   body = get_loop_body (loop);
6833   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6834   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6835   free (body);
6836
6837   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6838
6839   /* For each ssa name determines whether it behaves as an induction variable
6840      in some loop.  */
6841   if (!find_induction_variables (data))
6842     goto finish;
6843
6844   /* Finds interesting uses (item 1).  */
6845   find_interesting_uses (data);
6846   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6847     goto finish;
6848
6849   /* Finds candidates for the induction variables (item 2).  */
6850   find_iv_candidates (data);
6851
6852   /* Calculates the costs (item 3, part 1).  */
6853   determine_iv_costs (data);
6854   determine_use_iv_costs (data);
6855   determine_set_costs (data);
6856
6857   /* Find the optimal set of induction variables (item 3, part 2).  */
6858   iv_ca = find_optimal_iv_set (data);
6859   if (!iv_ca)
6860     goto finish;
6861   changed = true;
6862
6863   /* Create the new induction variables (item 4, part 1).  */
6864   create_new_ivs (data, iv_ca);
6865   iv_ca_free (&iv_ca);
6866
6867   /* Rewrite the uses (item 4, part 2).  */
6868   rewrite_uses (data);
6869
6870   /* Remove the ivs that are unused after rewriting.  */
6871   remove_unused_ivs (data);
6872
6873   /* We have changed the structure of induction variables; it might happen
6874      that definitions in the scev database refer to some of them that were
6875      eliminated.  */
6876   scev_reset ();
6877
6878 finish:
6879   free_loop_data (data);
6880
6881   return changed;
6882 }
6883
6884 /* Main entry point.  Optimizes induction variables in loops.  */
6885
6886 void
6887 tree_ssa_iv_optimize (void)
6888 {
6889   struct loop *loop;
6890   struct ivopts_data data;
6891
6892   tree_ssa_iv_optimize_init (&data);
6893
6894   /* Optimize the loops starting with the innermost ones.  */
6895   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6896     {
6897       if (dump_file && (dump_flags & TDF_DETAILS))
6898         flow_loop_dump (loop, dump_file, NULL, 1);
6899
6900       tree_ssa_iv_optimize_loop (&data, loop);
6901     }
6902
6903   tree_ssa_iv_optimize_finalize (&data);
6904 }