gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "tm_p.h"
  70 #include "basic-block.h"
  71 #include "gimple-pretty-print.h"
  72 #include "gimple.h"
  73 #include "gimplify.h"
  74 #include "gimple-iterator.h"
  75 #include "gimplify-me.h"
  76 #include "gimple-ssa.h"
  77 #include "cgraph.h"
  78 #include "tree-cfg.h"
  79 #include "tree-phinodes.h"
  80 #include "ssa-iterators.h"
  81 #include "tree-ssanames.h"
  82 #include "tree-ssa-loop-ivopts.h"
  83 #include "tree-ssa-loop-manip.h"
  84 #include "tree-ssa-loop-niter.h"
  85 #include "tree-ssa-loop.h"
  86 #include "tree-dfa.h"
  87 #include "tree-ssa.h"
  88 #include "cfgloop.h"
  89 #include "tree-pass.h"
  90 #include "ggc.h"
  91 #include "insn-config.h"
  92 #include "pointer-set.h"
  93 #include "hash-table.h"
  94 #include "tree-chrec.h"
  95 #include "tree-scalar-evolution.h"
  96 #include "cfgloop.h"
  97 #include "params.h"
  98 #include "langhooks.h"
  99 #include "tree-affine.h"
 100 #include "target.h"
 101 #include "tree-inline.h"
 102 #include "tree-ssa-propagate.h"
 103 #include "expmed.h"
 104 #include "tree-ssa-address.h"
 105
 106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 107    cost of different addressing modes.  This should be moved to a TBD
 108    interface between the GIMPLE and RTL worlds.  */
 109 #include "expr.h"
 110 #include "recog.h"
 111
 112 /* The infinite cost.  */
 113 #define INFTY 10000000
 114
 115 #define AVG_LOOP_NITER(LOOP) 5
 116
 117 /* Returns the expected number of loop iterations for LOOP.
 118    The average trip count is computed from profile data if it
 119    exists. */
 120
 121 static inline HOST_WIDE_INT
 122 avg_loop_niter (struct loop *loop)
 123 {
 124   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 125   if (niter == -1)
 126     return AVG_LOOP_NITER (loop);
 127
 128   return niter;
 129 }
 130
 131 /* Representation of the induction variable.  */
 132 struct iv
 133 {
 134   tree base;            /* Initial value of the iv.  */
 135   tree base_object;     /* A memory object to that the induction variable points.  */
 136   tree step;            /* Step of the iv (constant only).  */
 137   tree ssa_name;        /* The ssa name with the value.  */
 138   bool biv_p;           /* Is it a biv?  */
 139   bool have_use_for;    /* Do we already have a use for it?  */
 140   unsigned use_id;      /* The identifier in the use if it is the case.  */
 141 };
 142
 143 /* Per-ssa version information (induction variable descriptions, etc.).  */
 144 struct version_info
 145 {
 146   tree name;            /* The ssa name.  */
 147   struct iv *iv;        /* Induction variable description.  */
 148   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 149                            an expression that is not an induction variable.  */
 150   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 151   unsigned inv_id;      /* Id of an invariant.  */
 152 };
 153
 154 /* Types of uses.  */
 155 enum use_type
 156 {
 157   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 158   USE_ADDRESS,          /* Use in an address.  */
 159   USE_COMPARE           /* Use is a compare.  */
 160 };
 161
 162 /* Cost of a computation.  */
 163 typedef struct
 164 {
 165   int cost;             /* The runtime cost.  */
 166   unsigned complexity;  /* The estimate of the complexity of the code for
 167                            the computation (in no concrete units --
 168                            complexity field should be larger for more
 169                            complex expressions and addressing modes).  */
 170 } comp_cost;
 171
 172 static const comp_cost no_cost = {0, 0};
 173 static const comp_cost infinite_cost = {INFTY, INFTY};
 174
 175 /* The candidate - cost pair.  */
 176 struct cost_pair
 177 {
 178   struct iv_cand *cand; /* The candidate.  */
 179   comp_cost cost;       /* The cost.  */
 180   bitmap depends_on;    /* The list of invariants that have to be
 181                            preserved.  */
 182   tree value;           /* For final value elimination, the expression for
 183                            the final value of the iv.  For iv elimination,
 184                            the new bound to compare with.  */
 185   enum tree_code comp;  /* For iv elimination, the comparison.  */
 186   int inv_expr_id;      /* Loop invariant expression id.  */
 187 };
 188
 189 /* Use.  */
 190 struct iv_use
 191 {
 192   unsigned id;          /* The id of the use.  */
 193   enum use_type type;   /* Type of the use.  */
 194   struct iv *iv;        /* The induction variable it is based on.  */
 195   gimple stmt;          /* Statement in that it occurs.  */
 196   tree *op_p;           /* The place where it occurs.  */
 197   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 198                            important ones.  */
 199
 200   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 201   struct cost_pair *cost_map;
 202                         /* The costs wrto the iv candidates.  */
 203
 204   struct iv_cand *selected;
 205                         /* The selected candidate.  */
 206 };
 207
 208 /* The position where the iv is computed.  */
 209 enum iv_position
 210 {
 211   IP_NORMAL,            /* At the end, just before the exit condition.  */
 212   IP_END,               /* At the end of the latch block.  */
 213   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 214   IP_AFTER_USE,         /* Immediately after a specific use.  */
 215   IP_ORIGINAL           /* The original biv.  */
 216 };
 217
 218 /* The induction variable candidate.  */
 219 struct iv_cand
 220 {
 221   unsigned id;          /* The number of the candidate.  */
 222   bool important;       /* Whether this is an "important" candidate, i.e. such
 223                            that it should be considered by all uses.  */
 224   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 225   gimple incremented_at;/* For original biv, the statement where it is
 226                            incremented.  */
 227   tree var_before;      /* The variable used for it before increment.  */
 228   tree var_after;       /* The variable used for it after increment.  */
 229   struct iv *iv;        /* The value of the candidate.  NULL for
 230                            "pseudocandidate" used to indicate the possibility
 231                            to replace the final value of an iv by direct
 232                            computation of the value.  */
 233   unsigned cost;        /* Cost of the candidate.  */
 234   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 235   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 236                               where it is incremented.  */
 237   bitmap depends_on;    /* The list of invariants that are used in step of the
 238                            biv.  */
 239 };
 240
 241 /* Loop invariant expression hashtable entry.  */
 242 struct iv_inv_expr_ent
 243 {
 244   tree expr;
 245   int id;
 246   hashval_t hash;
 247 };
 248
 249 /* The data used by the induction variable optimizations.  */
 250
 251 typedef struct iv_use *iv_use_p;
 252
 253 typedef struct iv_cand *iv_cand_p;
 254
 255 /* Hashtable helpers.  */
 256
 257 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 258 {
 259   typedef iv_inv_expr_ent value_type;
 260   typedef iv_inv_expr_ent compare_type;
 261   static inline hashval_t hash (const value_type *);
 262   static inline bool equal (const value_type *, const compare_type *);
 263 };
 264
 265 /* Hash function for loop invariant expressions.  */
 266
 267 inline hashval_t
 268 iv_inv_expr_hasher::hash (const value_type *expr)
 269 {
 270   return expr->hash;
 271 }
 272
 273 /* Hash table equality function for expressions.  */
 274
 275 inline bool
 276 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 277 {
 278   return expr1->hash == expr2->hash
 279          && operand_equal_p (expr1->expr, expr2->expr, 0);
 280 }
 281
 282 struct ivopts_data
 283 {
 284   /* The currently optimized loop.  */
 285   struct loop *current_loop;
 286
 287   /* Numbers of iterations for all exits of the current loop.  */
 288   struct pointer_map_t *niters;
 289
 290   /* Number of registers used in it.  */
 291   unsigned regs_used;
 292
 293   /* The size of version_info array allocated.  */
 294   unsigned version_info_size;
 295
 296   /* The array of information for the ssa names.  */
 297   struct version_info *version_info;
 298
 299   /* The hashtable of loop invariant expressions created
 300      by ivopt.  */
 301   hash_table <iv_inv_expr_hasher> inv_expr_tab;
 302
 303   /* Loop invariant expression id.  */
 304   int inv_expr_id;
 305
 306   /* The bitmap of indices in version_info whose value was changed.  */
 307   bitmap relevant;
 308
 309   /* The uses of induction variables.  */
 310   vec<iv_use_p> iv_uses;
 311
 312   /* The candidates.  */
 313   vec<iv_cand_p> iv_candidates;
 314
 315   /* A bitmap of important candidates.  */
 316   bitmap important_candidates;
 317
 318   /* The maximum invariant id.  */
 319   unsigned max_inv_id;
 320
 321   /* Whether to consider just related and important candidates when replacing a
 322      use.  */
 323   bool consider_all_candidates;
 324
 325   /* Are we optimizing for speed?  */
 326   bool speed;
 327
 328   /* Whether the loop body includes any function calls.  */
 329   bool body_includes_call;
 330
 331   /* Whether the loop body can only be exited via single exit.  */
 332   bool loop_single_exit_p;
 333 };
 334
 335 /* An assignment of iv candidates to uses.  */
 336
 337 struct iv_ca
 338 {
 339   /* The number of uses covered by the assignment.  */
 340   unsigned upto;
 341
 342   /* Number of uses that cannot be expressed by the candidates in the set.  */
 343   unsigned bad_uses;
 344
 345   /* Candidate assigned to a use, together with the related costs.  */
 346   struct cost_pair **cand_for_use;
 347
 348   /* Number of times each candidate is used.  */
 349   unsigned *n_cand_uses;
 350
 351   /* The candidates used.  */
 352   bitmap cands;
 353
 354   /* The number of candidates in the set.  */
 355   unsigned n_cands;
 356
 357   /* Total number of registers needed.  */
 358   unsigned n_regs;
 359
 360   /* Total cost of expressing uses.  */
 361   comp_cost cand_use_cost;
 362
 363   /* Total cost of candidates.  */
 364   unsigned cand_cost;
 365
 366   /* Number of times each invariant is used.  */
 367   unsigned *n_invariant_uses;
 368
 369   /* The array holding the number of uses of each loop
 370      invariant expressions created by ivopt.  */
 371   unsigned *used_inv_expr;
 372
 373   /* The number of created loop invariants.  */
 374   unsigned num_used_inv_expr;
 375
 376   /* Total cost of the assignment.  */
 377   comp_cost cost;
 378 };
 379
 380 /* Difference of two iv candidate assignments.  */
 381
 382 struct iv_ca_delta
 383 {
 384   /* Changed use.  */
 385   struct iv_use *use;
 386
 387   /* An old assignment (for rollback purposes).  */
 388   struct cost_pair *old_cp;
 389
 390   /* A new assignment.  */
 391   struct cost_pair *new_cp;
 392
 393   /* Next change in the list.  */
 394   struct iv_ca_delta *next_change;
 395 };
 396
 397 /* Bound on number of candidates below that all candidates are considered.  */
 398
 399 #define CONSIDER_ALL_CANDIDATES_BOUND \
 400   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 401
 402 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 403    optimizing such a loop would help, and it would take ages).  */
 404
 405 #define MAX_CONSIDERED_USES \
 406   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 407
 408 /* If there are at most this number of ivs in the set, try removing unnecessary
 409    ivs from the set always.  */
 410
 411 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 412   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 413
 414 /* The list of trees for that the decl_rtl field must be reset is stored
 415    here.  */
 416
 417 static vec<tree> decl_rtl_to_reset;
 418
 419 static comp_cost force_expr_to_var_cost (tree, bool);
 420
 421 /* Number of uses recorded in DATA.  */
 422
 423 static inline unsigned
 424 n_iv_uses (struct ivopts_data *data)
 425 {
 426   return data->iv_uses.length ();
 427 }
 428
 429 /* Ith use recorded in DATA.  */
 430
 431 static inline struct iv_use *
 432 iv_use (struct ivopts_data *data, unsigned i)
 433 {
 434   return data->iv_uses[i];
 435 }
 436
 437 /* Number of candidates recorded in DATA.  */
 438
 439 static inline unsigned
 440 n_iv_cands (struct ivopts_data *data)
 441 {
 442   return data->iv_candidates.length ();
 443 }
 444
 445 /* Ith candidate recorded in DATA.  */
 446
 447 static inline struct iv_cand *
 448 iv_cand (struct ivopts_data *data, unsigned i)
 449 {
 450   return data->iv_candidates[i];
 451 }
 452
 453 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 454
 455 edge
 456 single_dom_exit (struct loop *loop)
 457 {
 458   edge exit = single_exit (loop);
 459
 460   if (!exit)
 461     return NULL;
 462
 463   if (!just_once_each_iteration_p (loop, exit->src))
 464     return NULL;
 465
 466   return exit;
 467 }
 468
 469 /* Dumps information about the induction variable IV to FILE.  */
 470
 471 void
 472 dump_iv (FILE *file, struct iv *iv)
 473 {
 474   if (iv->ssa_name)
 475     {
 476       fprintf (file, "ssa name ");
 477       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 478       fprintf (file, "\n");
 479     }
 480
 481   fprintf (file, "  type ");
 482   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 483   fprintf (file, "\n");
 484
 485   if (iv->step)
 486     {
 487       fprintf (file, "  base ");
 488       print_generic_expr (file, iv->base, TDF_SLIM);
 489       fprintf (file, "\n");
 490
 491       fprintf (file, "  step ");
 492       print_generic_expr (file, iv->step, TDF_SLIM);
 493       fprintf (file, "\n");
 494     }
 495   else
 496     {
 497       fprintf (file, "  invariant ");
 498       print_generic_expr (file, iv->base, TDF_SLIM);
 499       fprintf (file, "\n");
 500     }
 501
 502   if (iv->base_object)
 503     {
 504       fprintf (file, "  base object ");
 505       print_generic_expr (file, iv->base_object, TDF_SLIM);
 506       fprintf (file, "\n");
 507     }
 508
 509   if (iv->biv_p)
 510     fprintf (file, "  is a biv\n");
 511 }
 512
 513 /* Dumps information about the USE to FILE.  */
 514
 515 void
 516 dump_use (FILE *file, struct iv_use *use)
 517 {
 518   fprintf (file, "use %d\n", use->id);
 519
 520   switch (use->type)
 521     {
 522     case USE_NONLINEAR_EXPR:
 523       fprintf (file, "  generic\n");
 524       break;
 525
 526     case USE_ADDRESS:
 527       fprintf (file, "  address\n");
 528       break;
 529
 530     case USE_COMPARE:
 531       fprintf (file, "  compare\n");
 532       break;
 533
 534     default:
 535       gcc_unreachable ();
 536     }
 537
 538   fprintf (file, "  in statement ");
 539   print_gimple_stmt (file, use->stmt, 0, 0);
 540   fprintf (file, "\n");
 541
 542   fprintf (file, "  at position ");
 543   if (use->op_p)
 544     print_generic_expr (file, *use->op_p, TDF_SLIM);
 545   fprintf (file, "\n");
 546
 547   dump_iv (file, use->iv);
 548
 549   if (use->related_cands)
 550     {
 551       fprintf (file, "  related candidates ");
 552       dump_bitmap (file, use->related_cands);
 553     }
 554 }
 555
 556 /* Dumps information about the uses to FILE.  */
 557
 558 void
 559 dump_uses (FILE *file, struct ivopts_data *data)
 560 {
 561   unsigned i;
 562   struct iv_use *use;
 563
 564   for (i = 0; i < n_iv_uses (data); i++)
 565     {
 566       use = iv_use (data, i);
 567
 568       dump_use (file, use);
 569       fprintf (file, "\n");
 570     }
 571 }
 572
 573 /* Dumps information about induction variable candidate CAND to FILE.  */
 574
 575 void
 576 dump_cand (FILE *file, struct iv_cand *cand)
 577 {
 578   struct iv *iv = cand->iv;
 579
 580   fprintf (file, "candidate %d%s\n",
 581            cand->id, cand->important ? " (important)" : "");
 582
 583   if (cand->depends_on)
 584     {
 585       fprintf (file, "  depends on ");
 586       dump_bitmap (file, cand->depends_on);
 587     }
 588
 589   if (!iv)
 590     {
 591       fprintf (file, "  final value replacement\n");
 592       return;
 593     }
 594
 595   if (cand->var_before)
 596     {
 597       fprintf (file, "  var_before ");
 598       print_generic_expr (file, cand->var_before, TDF_SLIM);
 599       fprintf (file, "\n");
 600     }
 601   if (cand->var_after)
 602     {
 603       fprintf (file, "  var_after ");
 604       print_generic_expr (file, cand->var_after, TDF_SLIM);
 605       fprintf (file, "\n");
 606     }
 607
 608   switch (cand->pos)
 609     {
 610     case IP_NORMAL:
 611       fprintf (file, "  incremented before exit test\n");
 612       break;
 613
 614     case IP_BEFORE_USE:
 615       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 616       break;
 617
 618     case IP_AFTER_USE:
 619       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 620       break;
 621
 622     case IP_END:
 623       fprintf (file, "  incremented at end\n");
 624       break;
 625
 626     case IP_ORIGINAL:
 627       fprintf (file, "  original biv\n");
 628       break;
 629     }
 630
 631   dump_iv (file, iv);
 632 }
 633
 634 /* Returns the info for ssa version VER.  */
 635
 636 static inline struct version_info *
 637 ver_info (struct ivopts_data *data, unsigned ver)
 638 {
 639   return data->version_info + ver;
 640 }
 641
 642 /* Returns the info for ssa name NAME.  */
 643
 644 static inline struct version_info *
 645 name_info (struct ivopts_data *data, tree name)
 646 {
 647   return ver_info (data, SSA_NAME_VERSION (name));
 648 }
 649
 650 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 651    emitted in LOOP.  */
 652
 653 static bool
 654 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 655 {
 656   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 657
 658   gcc_assert (bb);
 659
 660   if (sbb == loop->latch)
 661     return true;
 662
 663   if (sbb != bb)
 664     return false;
 665
 666   return stmt == last_stmt (bb);
 667 }
 668
 669 /* Returns true if STMT if after the place where the original induction
 670    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 671    if the positions are identical.  */
 672
 673 static bool
 674 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 675 {
 676   basic_block cand_bb = gimple_bb (cand->incremented_at);
 677   basic_block stmt_bb = gimple_bb (stmt);
 678
 679   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 680     return false;
 681
 682   if (stmt_bb != cand_bb)
 683     return true;
 684
 685   if (true_if_equal
 686       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 687     return true;
 688   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 689 }
 690
 691 /* Returns true if STMT if after the place where the induction variable
 692    CAND is incremented in LOOP.  */
 693
 694 static bool
 695 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 696 {
 697   switch (cand->pos)
 698     {
 699     case IP_END:
 700       return false;
 701
 702     case IP_NORMAL:
 703       return stmt_after_ip_normal_pos (loop, stmt);
 704
 705     case IP_ORIGINAL:
 706     case IP_AFTER_USE:
 707       return stmt_after_inc_pos (cand, stmt, false);
 708
 709     case IP_BEFORE_USE:
 710       return stmt_after_inc_pos (cand, stmt, true);
 711
 712     default:
 713       gcc_unreachable ();
 714     }
 715 }
 716
 717 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 718
 719 static bool
 720 abnormal_ssa_name_p (tree exp)
 721 {
 722   if (!exp)
 723     return false;
 724
 725   if (TREE_CODE (exp) != SSA_NAME)
 726     return false;
 727
 728   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 729 }
 730
 731 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 732    abnormal phi node.  Callback for for_each_index.  */
 733
 734 static bool
 735 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 736                                   void *data ATTRIBUTE_UNUSED)
 737 {
 738   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 739     {
 740       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 741         return false;
 742       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 743         return false;
 744     }
 745
 746   return !abnormal_ssa_name_p (*index);
 747 }
 748
 749 /* Returns true if EXPR contains a ssa name that occurs in an
 750    abnormal phi node.  */
 751
 752 bool
 753 contains_abnormal_ssa_name_p (tree expr)
 754 {
 755   enum tree_code code;
 756   enum tree_code_class codeclass;
 757
 758   if (!expr)
 759     return false;
 760
 761   code = TREE_CODE (expr);
 762   codeclass = TREE_CODE_CLASS (code);
 763
 764   if (code == SSA_NAME)
 765     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 766
 767   if (code == INTEGER_CST
 768       || is_gimple_min_invariant (expr))
 769     return false;
 770
 771   if (code == ADDR_EXPR)
 772     return !for_each_index (&TREE_OPERAND (expr, 0),
 773                             idx_contains_abnormal_ssa_name_p,
 774                             NULL);
 775
 776   if (code == COND_EXPR)
 777     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 778       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 779       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 780
 781   switch (codeclass)
 782     {
 783     case tcc_binary:
 784     case tcc_comparison:
 785       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 786         return true;
 787
 788       /* Fallthru.  */
 789     case tcc_unary:
 790       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 791         return true;
 792
 793       break;
 794
 795     default:
 796       gcc_unreachable ();
 797     }
 798
 799   return false;
 800 }
 801
 802 /*  Returns the structure describing number of iterations determined from
 803     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 804
 805 static struct tree_niter_desc *
 806 niter_for_exit (struct ivopts_data *data, edge exit)
 807 {
 808   struct tree_niter_desc *desc;
 809   void **slot;
 810
 811   if (!data->niters)
 812     {
 813       data->niters = pointer_map_create ();
 814       slot = NULL;
 815     }
 816   else
 817     slot = pointer_map_contains (data->niters, exit);
 818
 819   if (!slot)
 820     {
 821       /* Try to determine number of iterations.  We cannot safely work with ssa
 822          names that appear in phi nodes on abnormal edges, so that we do not
 823          create overlapping life ranges for them (PR 27283).  */
 824       desc = XNEW (struct tree_niter_desc);
 825       if (!number_of_iterations_exit (data->current_loop,
 826                                       exit, desc, true)
 827           || contains_abnormal_ssa_name_p (desc->niter))
 828         {
 829           XDELETE (desc);
 830           desc = NULL;
 831         }
 832       slot = pointer_map_insert (data->niters, exit);
 833       *slot = desc;
 834     }
 835   else
 836     desc = (struct tree_niter_desc *) *slot;
 837
 838   return desc;
 839 }
 840
 841 /* Returns the structure describing number of iterations determined from
 842    single dominating exit of DATA->current_loop, or NULL if something
 843    goes wrong.  */
 844
 845 static struct tree_niter_desc *
 846 niter_for_single_dom_exit (struct ivopts_data *data)
 847 {
 848   edge exit = single_dom_exit (data->current_loop);
 849
 850   if (!exit)
 851     return NULL;
 852
 853   return niter_for_exit (data, exit);
 854 }
 855
 856 /* Initializes data structures used by the iv optimization pass, stored
 857    in DATA.  */
 858
 859 static void
 860 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 861 {
 862   data->version_info_size = 2 * num_ssa_names;
 863   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 864   data->relevant = BITMAP_ALLOC (NULL);
 865   data->important_candidates = BITMAP_ALLOC (NULL);
 866   data->max_inv_id = 0;
 867   data->niters = NULL;
 868   data->iv_uses.create (20);
 869   data->iv_candidates.create (20);
 870   data->inv_expr_tab.create (10);
 871   data->inv_expr_id = 0;
 872   decl_rtl_to_reset.create (20);
 873 }
 874
 875 /* Returns a memory object to that EXPR points.  In case we are able to
 876    determine that it does not point to any such object, NULL is returned.  */
 877
 878 static tree
 879 determine_base_object (tree expr)
 880 {
 881   enum tree_code code = TREE_CODE (expr);
 882   tree base, obj;
 883
 884   /* If this is a pointer casted to any type, we need to determine
 885      the base object for the pointer; so handle conversions before
 886      throwing away non-pointer expressions.  */
 887   if (CONVERT_EXPR_P (expr))
 888     return determine_base_object (TREE_OPERAND (expr, 0));
 889
 890   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 891     return NULL_TREE;
 892
 893   switch (code)
 894     {
 895     case INTEGER_CST:
 896       return NULL_TREE;
 897
 898     case ADDR_EXPR:
 899       obj = TREE_OPERAND (expr, 0);
 900       base = get_base_address (obj);
 901
 902       if (!base)
 903         return expr;
 904
 905       if (TREE_CODE (base) == MEM_REF)
 906         return determine_base_object (TREE_OPERAND (base, 0));
 907
 908       return fold_convert (ptr_type_node,
 909                            build_fold_addr_expr (base));
 910
 911     case POINTER_PLUS_EXPR:
 912       return determine_base_object (TREE_OPERAND (expr, 0));
 913
 914     case PLUS_EXPR:
 915     case MINUS_EXPR:
 916       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 917       gcc_unreachable ();
 918
 919     default:
 920       return fold_convert (ptr_type_node, expr);
 921     }
 922 }
 923
 924 /* Allocates an induction variable with given initial value BASE and step STEP
 925    for loop LOOP.  */
 926
 927 static struct iv *
 928 alloc_iv (tree base, tree step)
 929 {
 930   tree base_object = base;
 931   struct iv *iv = XCNEW (struct iv);
 932   gcc_assert (step != NULL_TREE);
 933
 934   /* Lower all address expressions except ones with DECL_P as operand.
 935      By doing this:
 936        1) More accurate cost can be computed for address expressions;
 937        2) Duplicate candidates won't be created for bases in different
 938           forms, like &a[0] and &a.  */
 939   STRIP_NOPS (base_object);
 940   if (TREE_CODE (base_object) == ADDR_EXPR
 941       && !DECL_P (TREE_OPERAND (base_object, 0)))
 942     {
 943       aff_tree comb;
 944       double_int size;
 945       base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
 946                                              &comb, &size);
 947       gcc_assert (base_object != NULL_TREE);
 948       base_object = build_fold_addr_expr (base_object);
 949       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
 950     }
 951
 952   iv->base = base;
 953   iv->base_object = determine_base_object (base_object);
 954   iv->step = step;
 955   iv->biv_p = false;
 956   iv->have_use_for = false;
 957   iv->use_id = 0;
 958   iv->ssa_name = NULL_TREE;
 959
 960   return iv;
 961 }
 962
 963 /* Sets STEP and BASE for induction variable IV.  */
 964
 965 static void
 966 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 967 {
 968   struct version_info *info = name_info (data, iv);
 969
 970   gcc_assert (!info->iv);
 971
 972   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 973   info->iv = alloc_iv (base, step);
 974   info->iv->ssa_name = iv;
 975 }
 976
 977 /* Finds induction variable declaration for VAR.  */
 978
 979 static struct iv *
 980 get_iv (struct ivopts_data *data, tree var)
 981 {
 982   basic_block bb;
 983   tree type = TREE_TYPE (var);
 984
 985   if (!POINTER_TYPE_P (type)
 986       && !INTEGRAL_TYPE_P (type))
 987     return NULL;
 988
 989   if (!name_info (data, var)->iv)
 990     {
 991       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 992
 993       if (!bb
 994           || !flow_bb_inside_loop_p (data->current_loop, bb))
 995         set_iv (data, var, var, build_int_cst (type, 0));
 996     }
 997
 998   return name_info (data, var)->iv;
 999 }
1000
1001 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1002    not define a simple affine biv with nonzero step.  */
1003
1004 static tree
1005 determine_biv_step (gimple phi)
1006 {
1007   struct loop *loop = gimple_bb (phi)->loop_father;
1008   tree name = PHI_RESULT (phi);
1009   affine_iv iv;
1010
1011   if (virtual_operand_p (name))
1012     return NULL_TREE;
1013
1014   if (!simple_iv (loop, loop, name, &iv, true))
1015     return NULL_TREE;
1016
1017   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1018 }
1019
1020 /* Finds basic ivs.  */
1021
1022 static bool
1023 find_bivs (struct ivopts_data *data)
1024 {
1025   gimple phi;
1026   tree step, type, base;
1027   bool found = false;
1028   struct loop *loop = data->current_loop;
1029   gimple_stmt_iterator psi;
1030
1031   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1032     {
1033       phi = gsi_stmt (psi);
1034
1035       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1036         continue;
1037
1038       step = determine_biv_step (phi);
1039       if (!step)
1040         continue;
1041
1042       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1043       base = expand_simple_operations (base);
1044       if (contains_abnormal_ssa_name_p (base)
1045           || contains_abnormal_ssa_name_p (step))
1046         continue;
1047
1048       type = TREE_TYPE (PHI_RESULT (phi));
1049       base = fold_convert (type, base);
1050       if (step)
1051         {
1052           if (POINTER_TYPE_P (type))
1053             step = convert_to_ptrofftype (step);
1054           else
1055             step = fold_convert (type, step);
1056         }
1057
1058       set_iv (data, PHI_RESULT (phi), base, step);
1059       found = true;
1060     }
1061
1062   return found;
1063 }
1064
1065 /* Marks basic ivs.  */
1066
1067 static void
1068 mark_bivs (struct ivopts_data *data)
1069 {
1070   gimple phi;
1071   tree var;
1072   struct iv *iv, *incr_iv;
1073   struct loop *loop = data->current_loop;
1074   basic_block incr_bb;
1075   gimple_stmt_iterator psi;
1076
1077   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1078     {
1079       phi = gsi_stmt (psi);
1080
1081       iv = get_iv (data, PHI_RESULT (phi));
1082       if (!iv)
1083         continue;
1084
1085       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1086       incr_iv = get_iv (data, var);
1087       if (!incr_iv)
1088         continue;
1089
1090       /* If the increment is in the subloop, ignore it.  */
1091       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1092       if (incr_bb->loop_father != data->current_loop
1093           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1094         continue;
1095
1096       iv->biv_p = true;
1097       incr_iv->biv_p = true;
1098     }
1099 }
1100
1101 /* Checks whether STMT defines a linear induction variable and stores its
1102    parameters to IV.  */
1103
1104 static bool
1105 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1106 {
1107   tree lhs;
1108   struct loop *loop = data->current_loop;
1109
1110   iv->base = NULL_TREE;
1111   iv->step = NULL_TREE;
1112
1113   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1114     return false;
1115
1116   lhs = gimple_assign_lhs (stmt);
1117   if (TREE_CODE (lhs) != SSA_NAME)
1118     return false;
1119
1120   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1121     return false;
1122   iv->base = expand_simple_operations (iv->base);
1123
1124   if (contains_abnormal_ssa_name_p (iv->base)
1125       || contains_abnormal_ssa_name_p (iv->step))
1126     return false;
1127
1128   /* If STMT could throw, then do not consider STMT as defining a GIV.
1129      While this will suppress optimizations, we can not safely delete this
1130      GIV and associated statements, even if it appears it is not used.  */
1131   if (stmt_could_throw_p (stmt))
1132     return false;
1133
1134   return true;
1135 }
1136
1137 /* Finds general ivs in statement STMT.  */
1138
1139 static void
1140 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1141 {
1142   affine_iv iv;
1143
1144   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1145     return;
1146
1147   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1148 }
1149
1150 /* Finds general ivs in basic block BB.  */
1151
1152 static void
1153 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1154 {
1155   gimple_stmt_iterator bsi;
1156
1157   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1158     find_givs_in_stmt (data, gsi_stmt (bsi));
1159 }
1160
1161 /* Finds general ivs.  */
1162
1163 static void
1164 find_givs (struct ivopts_data *data)
1165 {
1166   struct loop *loop = data->current_loop;
1167   basic_block *body = get_loop_body_in_dom_order (loop);
1168   unsigned i;
1169
1170   for (i = 0; i < loop->num_nodes; i++)
1171     find_givs_in_bb (data, body[i]);
1172   free (body);
1173 }
1174
1175 /* For each ssa name defined in LOOP determines whether it is an induction
1176    variable and if so, its initial value and step.  */
1177
1178 static bool
1179 find_induction_variables (struct ivopts_data *data)
1180 {
1181   unsigned i;
1182   bitmap_iterator bi;
1183
1184   if (!find_bivs (data))
1185     return false;
1186
1187   find_givs (data);
1188   mark_bivs (data);
1189
1190   if (dump_file && (dump_flags & TDF_DETAILS))
1191     {
1192       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1193
1194       if (niter)
1195         {
1196           fprintf (dump_file, "  number of iterations ");
1197           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1198           if (!integer_zerop (niter->may_be_zero))
1199             {
1200               fprintf (dump_file, "; zero if ");
1201               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1202             }
1203           fprintf (dump_file, "\n\n");
1204         };
1205
1206       fprintf (dump_file, "Induction variables:\n\n");
1207
1208       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1209         {
1210           if (ver_info (data, i)->iv)
1211             dump_iv (dump_file, ver_info (data, i)->iv);
1212         }
1213     }
1214
1215   return true;
1216 }
1217
1218 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1219
1220 static struct iv_use *
1221 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1222             gimple stmt, enum use_type use_type)
1223 {
1224   struct iv_use *use = XCNEW (struct iv_use);
1225
1226   use->id = n_iv_uses (data);
1227   use->type = use_type;
1228   use->iv = iv;
1229   use->stmt = stmt;
1230   use->op_p = use_p;
1231   use->related_cands = BITMAP_ALLOC (NULL);
1232
1233   /* To avoid showing ssa name in the dumps, if it was not reset by the
1234      caller.  */
1235   iv->ssa_name = NULL_TREE;
1236
1237   if (dump_file && (dump_flags & TDF_DETAILS))
1238     dump_use (dump_file, use);
1239
1240   data->iv_uses.safe_push (use);
1241
1242   return use;
1243 }
1244
1245 /* Checks whether OP is a loop-level invariant and if so, records it.
1246    NONLINEAR_USE is true if the invariant is used in a way we do not
1247    handle specially.  */
1248
1249 static void
1250 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1251 {
1252   basic_block bb;
1253   struct version_info *info;
1254
1255   if (TREE_CODE (op) != SSA_NAME
1256       || virtual_operand_p (op))
1257     return;
1258
1259   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1260   if (bb
1261       && flow_bb_inside_loop_p (data->current_loop, bb))
1262     return;
1263
1264   info = name_info (data, op);
1265   info->name = op;
1266   info->has_nonlin_use |= nonlinear_use;
1267   if (!info->inv_id)
1268     info->inv_id = ++data->max_inv_id;
1269   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1270 }
1271
1272 /* Checks whether the use OP is interesting and if so, records it.  */
1273
1274 static struct iv_use *
1275 find_interesting_uses_op (struct ivopts_data *data, tree op)
1276 {
1277   struct iv *iv;
1278   struct iv *civ;
1279   gimple stmt;
1280   struct iv_use *use;
1281
1282   if (TREE_CODE (op) != SSA_NAME)
1283     return NULL;
1284
1285   iv = get_iv (data, op);
1286   if (!iv)
1287     return NULL;
1288
1289   if (iv->have_use_for)
1290     {
1291       use = iv_use (data, iv->use_id);
1292
1293       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1294       return use;
1295     }
1296
1297   if (integer_zerop (iv->step))
1298     {
1299       record_invariant (data, op, true);
1300       return NULL;
1301     }
1302   iv->have_use_for = true;
1303
1304   civ = XNEW (struct iv);
1305   *civ = *iv;
1306
1307   stmt = SSA_NAME_DEF_STMT (op);
1308   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1309               || is_gimple_assign (stmt));
1310
1311   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1312   iv->use_id = use->id;
1313
1314   return use;
1315 }
1316
1317 /* Given a condition in statement STMT, checks whether it is a compare
1318    of an induction variable and an invariant.  If this is the case,
1319    CONTROL_VAR is set to location of the iv, BOUND to the location of
1320    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1321    induction variable descriptions, and true is returned.  If this is not
1322    the case, CONTROL_VAR and BOUND are set to the arguments of the
1323    condition and false is returned.  */
1324
1325 static bool
1326 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1327                        tree **control_var, tree **bound,
1328                        struct iv **iv_var, struct iv **iv_bound)
1329 {
1330   /* The objects returned when COND has constant operands.  */
1331   static struct iv const_iv;
1332   static tree zero;
1333   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1334   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1335   bool ret = false;
1336
1337   if (gimple_code (stmt) == GIMPLE_COND)
1338     {
1339       op0 = gimple_cond_lhs_ptr (stmt);
1340       op1 = gimple_cond_rhs_ptr (stmt);
1341     }
1342   else
1343     {
1344       op0 = gimple_assign_rhs1_ptr (stmt);
1345       op1 = gimple_assign_rhs2_ptr (stmt);
1346     }
1347
1348   zero = integer_zero_node;
1349   const_iv.step = integer_zero_node;
1350
1351   if (TREE_CODE (*op0) == SSA_NAME)
1352     iv0 = get_iv (data, *op0);
1353   if (TREE_CODE (*op1) == SSA_NAME)
1354     iv1 = get_iv (data, *op1);
1355
1356   /* Exactly one of the compared values must be an iv, and the other one must
1357      be an invariant.  */
1358   if (!iv0 || !iv1)
1359     goto end;
1360
1361   if (integer_zerop (iv0->step))
1362     {
1363       /* Control variable may be on the other side.  */
1364       tmp_op = op0; op0 = op1; op1 = tmp_op;
1365       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1366     }
1367   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1368
1369 end:
1370   if (control_var)
1371     *control_var = op0;;
1372   if (iv_var)
1373     *iv_var = iv0;;
1374   if (bound)
1375     *bound = op1;
1376   if (iv_bound)
1377     *iv_bound = iv1;
1378
1379   return ret;
1380 }
1381
1382 /* Checks whether the condition in STMT is interesting and if so,
1383    records it.  */
1384
1385 static void
1386 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1387 {
1388   tree *var_p, *bound_p;
1389   struct iv *var_iv, *civ;
1390
1391   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1392     {
1393       find_interesting_uses_op (data, *var_p);
1394       find_interesting_uses_op (data, *bound_p);
1395       return;
1396     }
1397
1398   civ = XNEW (struct iv);
1399   *civ = *var_iv;
1400   record_use (data, NULL, civ, stmt, USE_COMPARE);
1401 }
1402
1403 /* Returns the outermost loop EXPR is obviously invariant in
1404    relative to the loop LOOP, i.e. if all its operands are defined
1405    outside of the returned loop.  Returns NULL if EXPR is not
1406    even obviously invariant in LOOP.  */
1407
1408 struct loop *
1409 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1410 {
1411   basic_block def_bb;
1412   unsigned i, len;
1413
1414   if (is_gimple_min_invariant (expr))
1415     return current_loops->tree_root;
1416
1417   if (TREE_CODE (expr) == SSA_NAME)
1418     {
1419       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1420       if (def_bb)
1421         {
1422           if (flow_bb_inside_loop_p (loop, def_bb))
1423             return NULL;
1424           return superloop_at_depth (loop,
1425                                      loop_depth (def_bb->loop_father) + 1);
1426         }
1427
1428       return current_loops->tree_root;
1429     }
1430
1431   if (!EXPR_P (expr))
1432     return NULL;
1433
1434   unsigned maxdepth = 0;
1435   len = TREE_OPERAND_LENGTH (expr);
1436   for (i = 0; i < len; i++)
1437     {
1438       struct loop *ivloop;
1439       if (!TREE_OPERAND (expr, i))
1440         continue;
1441
1442       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1443       if (!ivloop)
1444         return NULL;
1445       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1446     }
1447
1448   return superloop_at_depth (loop, maxdepth);
1449 }
1450
1451 /* Returns true if expression EXPR is obviously invariant in LOOP,
1452    i.e. if all its operands are defined outside of the LOOP.  LOOP
1453    should not be the function body.  */
1454
1455 bool
1456 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1457 {
1458   basic_block def_bb;
1459   unsigned i, len;
1460
1461   gcc_assert (loop_depth (loop) > 0);
1462
1463   if (is_gimple_min_invariant (expr))
1464     return true;
1465
1466   if (TREE_CODE (expr) == SSA_NAME)
1467     {
1468       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1469       if (def_bb
1470           && flow_bb_inside_loop_p (loop, def_bb))
1471         return false;
1472
1473       return true;
1474     }
1475
1476   if (!EXPR_P (expr))
1477     return false;
1478
1479   len = TREE_OPERAND_LENGTH (expr);
1480   for (i = 0; i < len; i++)
1481     if (TREE_OPERAND (expr, i)
1482         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1483       return false;
1484
1485   return true;
1486 }
1487
1488 /* Cumulates the steps of indices into DATA and replaces their values with the
1489    initial ones.  Returns false when the value of the index cannot be determined.
1490    Callback for for_each_index.  */
1491
1492 struct ifs_ivopts_data
1493 {
1494   struct ivopts_data *ivopts_data;
1495   gimple stmt;
1496   tree step;
1497 };
1498
1499 static bool
1500 idx_find_step (tree base, tree *idx, void *data)
1501 {
1502   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1503   struct iv *iv;
1504   tree step, iv_base, iv_step, lbound, off;
1505   struct loop *loop = dta->ivopts_data->current_loop;
1506
1507   /* If base is a component ref, require that the offset of the reference
1508      be invariant.  */
1509   if (TREE_CODE (base) == COMPONENT_REF)
1510     {
1511       off = component_ref_field_offset (base);
1512       return expr_invariant_in_loop_p (loop, off);
1513     }
1514
1515   /* If base is array, first check whether we will be able to move the
1516      reference out of the loop (in order to take its address in strength
1517      reduction).  In order for this to work we need both lower bound
1518      and step to be loop invariants.  */
1519   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1520     {
1521       /* Moreover, for a range, the size needs to be invariant as well.  */
1522       if (TREE_CODE (base) == ARRAY_RANGE_REF
1523           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1524         return false;
1525
1526       step = array_ref_element_size (base);
1527       lbound = array_ref_low_bound (base);
1528
1529       if (!expr_invariant_in_loop_p (loop, step)
1530           || !expr_invariant_in_loop_p (loop, lbound))
1531         return false;
1532     }
1533
1534   if (TREE_CODE (*idx) != SSA_NAME)
1535     return true;
1536
1537   iv = get_iv (dta->ivopts_data, *idx);
1538   if (!iv)
1539     return false;
1540
1541   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1542           *&x[0], which is not folded and does not trigger the
1543           ARRAY_REF path below.  */
1544   *idx = iv->base;
1545
1546   if (integer_zerop (iv->step))
1547     return true;
1548
1549   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1550     {
1551       step = array_ref_element_size (base);
1552
1553       /* We only handle addresses whose step is an integer constant.  */
1554       if (TREE_CODE (step) != INTEGER_CST)
1555         return false;
1556     }
1557   else
1558     /* The step for pointer arithmetics already is 1 byte.  */
1559     step = size_one_node;
1560
1561   iv_base = iv->base;
1562   iv_step = iv->step;
1563   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1564                             sizetype, &iv_base, &iv_step, dta->stmt,
1565                             false))
1566     {
1567       /* The index might wrap.  */
1568       return false;
1569     }
1570
1571   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1572   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1573
1574   return true;
1575 }
1576
1577 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1578    object is passed to it in DATA.  */
1579
1580 static bool
1581 idx_record_use (tree base, tree *idx,
1582                 void *vdata)
1583 {
1584   struct ivopts_data *data = (struct ivopts_data *) vdata;
1585   find_interesting_uses_op (data, *idx);
1586   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1587     {
1588       find_interesting_uses_op (data, array_ref_element_size (base));
1589       find_interesting_uses_op (data, array_ref_low_bound (base));
1590     }
1591   return true;
1592 }
1593
1594 /* If we can prove that TOP = cst * BOT for some constant cst,
1595    store cst to MUL and return true.  Otherwise return false.
1596    The returned value is always sign-extended, regardless of the
1597    signedness of TOP and BOT.  */
1598
1599 static bool
1600 constant_multiple_of (tree top, tree bot, double_int *mul)
1601 {
1602   tree mby;
1603   enum tree_code code;
1604   double_int res, p0, p1;
1605   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1606
1607   STRIP_NOPS (top);
1608   STRIP_NOPS (bot);
1609
1610   if (operand_equal_p (top, bot, 0))
1611     {
1612       *mul = double_int_one;
1613       return true;
1614     }
1615
1616   code = TREE_CODE (top);
1617   switch (code)
1618     {
1619     case MULT_EXPR:
1620       mby = TREE_OPERAND (top, 1);
1621       if (TREE_CODE (mby) != INTEGER_CST)
1622         return false;
1623
1624       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1625         return false;
1626
1627       *mul = (res * tree_to_double_int (mby)).sext (precision);
1628       return true;
1629
1630     case PLUS_EXPR:
1631     case MINUS_EXPR:
1632       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1633           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1634         return false;
1635
1636       if (code == MINUS_EXPR)
1637         p1 = -p1;
1638       *mul = (p0 + p1).sext (precision);
1639       return true;
1640
1641     case INTEGER_CST:
1642       if (TREE_CODE (bot) != INTEGER_CST)
1643         return false;
1644
1645       p0 = tree_to_double_int (top).sext (precision);
1646       p1 = tree_to_double_int (bot).sext (precision);
1647       if (p1.is_zero ())
1648         return false;
1649       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1650       return res.is_zero ();
1651
1652     default:
1653       return false;
1654     }
1655 }
1656
1657 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1658
1659 static bool
1660 may_be_unaligned_p (tree ref, tree step)
1661 {
1662   tree base;
1663   tree base_type;
1664   HOST_WIDE_INT bitsize;
1665   HOST_WIDE_INT bitpos;
1666   tree toffset;
1667   enum machine_mode mode;
1668   int unsignedp, volatilep;
1669   unsigned base_align;
1670
1671   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1672      thus they are not misaligned.  */
1673   if (TREE_CODE (ref) == TARGET_MEM_REF)
1674     return false;
1675
1676   /* The test below is basically copy of what expr.c:normal_inner_ref
1677      does to check whether the object must be loaded by parts when
1678      STRICT_ALIGNMENT is true.  */
1679   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1680                               &unsignedp, &volatilep, true);
1681   base_type = TREE_TYPE (base);
1682   base_align = get_object_alignment (base);
1683   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1684
1685   if (mode != BLKmode)
1686     {
1687       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1688
1689       if (base_align < mode_align
1690           || (bitpos % mode_align) != 0
1691           || (bitpos % BITS_PER_UNIT) != 0)
1692         return true;
1693
1694       if (toffset
1695           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1696         return true;
1697
1698       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1699         return true;
1700     }
1701
1702   return false;
1703 }
1704
1705 /* Return true if EXPR may be non-addressable.   */
1706
1707 bool
1708 may_be_nonaddressable_p (tree expr)
1709 {
1710   switch (TREE_CODE (expr))
1711     {
1712     case TARGET_MEM_REF:
1713       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1714          target, thus they are always addressable.  */
1715       return false;
1716
1717     case COMPONENT_REF:
1718       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1719              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1720
1721     case VIEW_CONVERT_EXPR:
1722       /* This kind of view-conversions may wrap non-addressable objects
1723          and make them look addressable.  After some processing the
1724          non-addressability may be uncovered again, causing ADDR_EXPRs
1725          of inappropriate objects to be built.  */
1726       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1727           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1728         return true;
1729
1730       /* ... fall through ... */
1731
1732     case ARRAY_REF:
1733     case ARRAY_RANGE_REF:
1734       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1735
1736     CASE_CONVERT:
1737       return true;
1738
1739     default:
1740       break;
1741     }
1742
1743   return false;
1744 }
1745
1746 /* Finds addresses in *OP_P inside STMT.  */
1747
1748 static void
1749 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1750 {
1751   tree base = *op_p, step = size_zero_node;
1752   struct iv *civ;
1753   struct ifs_ivopts_data ifs_ivopts_data;
1754
1755   /* Do not play with volatile memory references.  A bit too conservative,
1756      perhaps, but safe.  */
1757   if (gimple_has_volatile_ops (stmt))
1758     goto fail;
1759
1760   /* Ignore bitfields for now.  Not really something terribly complicated
1761      to handle.  TODO.  */
1762   if (TREE_CODE (base) == BIT_FIELD_REF)
1763     goto fail;
1764
1765   base = unshare_expr (base);
1766
1767   if (TREE_CODE (base) == TARGET_MEM_REF)
1768     {
1769       tree type = build_pointer_type (TREE_TYPE (base));
1770       tree astep;
1771
1772       if (TMR_BASE (base)
1773           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1774         {
1775           civ = get_iv (data, TMR_BASE (base));
1776           if (!civ)
1777             goto fail;
1778
1779           TMR_BASE (base) = civ->base;
1780           step = civ->step;
1781         }
1782       if (TMR_INDEX2 (base)
1783           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1784         {
1785           civ = get_iv (data, TMR_INDEX2 (base));
1786           if (!civ)
1787             goto fail;
1788
1789           TMR_INDEX2 (base) = civ->base;
1790           step = civ->step;
1791         }
1792       if (TMR_INDEX (base)
1793           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1794         {
1795           civ = get_iv (data, TMR_INDEX (base));
1796           if (!civ)
1797             goto fail;
1798
1799           TMR_INDEX (base) = civ->base;
1800           astep = civ->step;
1801
1802           if (astep)
1803             {
1804               if (TMR_STEP (base))
1805                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1806
1807               step = fold_build2 (PLUS_EXPR, type, step, astep);
1808             }
1809         }
1810
1811       if (integer_zerop (step))
1812         goto fail;
1813       base = tree_mem_ref_addr (type, base);
1814     }
1815   else
1816     {
1817       ifs_ivopts_data.ivopts_data = data;
1818       ifs_ivopts_data.stmt = stmt;
1819       ifs_ivopts_data.step = size_zero_node;
1820       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1821           || integer_zerop (ifs_ivopts_data.step))
1822         goto fail;
1823       step = ifs_ivopts_data.step;
1824
1825       /* Check that the base expression is addressable.  This needs
1826          to be done after substituting bases of IVs into it.  */
1827       if (may_be_nonaddressable_p (base))
1828         goto fail;
1829
1830       /* Moreover, on strict alignment platforms, check that it is
1831          sufficiently aligned.  */
1832       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1833         goto fail;
1834
1835       base = build_fold_addr_expr (base);
1836
1837       /* Substituting bases of IVs into the base expression might
1838          have caused folding opportunities.  */
1839       if (TREE_CODE (base) == ADDR_EXPR)
1840         {
1841           tree *ref = &TREE_OPERAND (base, 0);
1842           while (handled_component_p (*ref))
1843             ref = &TREE_OPERAND (*ref, 0);
1844           if (TREE_CODE (*ref) == MEM_REF)
1845             {
1846               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1847                                       TREE_OPERAND (*ref, 0),
1848                                       TREE_OPERAND (*ref, 1));
1849               if (tem)
1850                 *ref = tem;
1851             }
1852         }
1853     }
1854
1855   civ = alloc_iv (base, step);
1856   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1857   return;
1858
1859 fail:
1860   for_each_index (op_p, idx_record_use, data);
1861 }
1862
1863 /* Finds and records invariants used in STMT.  */
1864
1865 static void
1866 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1867 {
1868   ssa_op_iter iter;
1869   use_operand_p use_p;
1870   tree op;
1871
1872   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1873     {
1874       op = USE_FROM_PTR (use_p);
1875       record_invariant (data, op, false);
1876     }
1877 }
1878
1879 /* Finds interesting uses of induction variables in the statement STMT.  */
1880
1881 static void
1882 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1883 {
1884   struct iv *iv;
1885   tree op, *lhs, *rhs;
1886   ssa_op_iter iter;
1887   use_operand_p use_p;
1888   enum tree_code code;
1889
1890   find_invariants_stmt (data, stmt);
1891
1892   if (gimple_code (stmt) == GIMPLE_COND)
1893     {
1894       find_interesting_uses_cond (data, stmt);
1895       return;
1896     }
1897
1898   if (is_gimple_assign (stmt))
1899     {
1900       lhs = gimple_assign_lhs_ptr (stmt);
1901       rhs = gimple_assign_rhs1_ptr (stmt);
1902
1903       if (TREE_CODE (*lhs) == SSA_NAME)
1904         {
1905           /* If the statement defines an induction variable, the uses are not
1906              interesting by themselves.  */
1907
1908           iv = get_iv (data, *lhs);
1909
1910           if (iv && !integer_zerop (iv->step))
1911             return;
1912         }
1913
1914       code = gimple_assign_rhs_code (stmt);
1915       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1916           && (REFERENCE_CLASS_P (*rhs)
1917               || is_gimple_val (*rhs)))
1918         {
1919           if (REFERENCE_CLASS_P (*rhs))
1920             find_interesting_uses_address (data, stmt, rhs);
1921           else
1922             find_interesting_uses_op (data, *rhs);
1923
1924           if (REFERENCE_CLASS_P (*lhs))
1925             find_interesting_uses_address (data, stmt, lhs);
1926           return;
1927         }
1928       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1929         {
1930           find_interesting_uses_cond (data, stmt);
1931           return;
1932         }
1933
1934       /* TODO -- we should also handle address uses of type
1935
1936          memory = call (whatever);
1937
1938          and
1939
1940          call (memory).  */
1941     }
1942
1943   if (gimple_code (stmt) == GIMPLE_PHI
1944       && gimple_bb (stmt) == data->current_loop->header)
1945     {
1946       iv = get_iv (data, PHI_RESULT (stmt));
1947
1948       if (iv && !integer_zerop (iv->step))
1949         return;
1950     }
1951
1952   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1953     {
1954       op = USE_FROM_PTR (use_p);
1955
1956       if (TREE_CODE (op) != SSA_NAME)
1957         continue;
1958
1959       iv = get_iv (data, op);
1960       if (!iv)
1961         continue;
1962
1963       find_interesting_uses_op (data, op);
1964     }
1965 }
1966
1967 /* Finds interesting uses of induction variables outside of loops
1968    on loop exit edge EXIT.  */
1969
1970 static void
1971 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1972 {
1973   gimple phi;
1974   gimple_stmt_iterator psi;
1975   tree def;
1976
1977   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1978     {
1979       phi = gsi_stmt (psi);
1980       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1981       if (!virtual_operand_p (def))
1982         find_interesting_uses_op (data, def);
1983     }
1984 }
1985
1986 /* Finds uses of the induction variables that are interesting.  */
1987
1988 static void
1989 find_interesting_uses (struct ivopts_data *data)
1990 {
1991   basic_block bb;
1992   gimple_stmt_iterator bsi;
1993   basic_block *body = get_loop_body (data->current_loop);
1994   unsigned i;
1995   struct version_info *info;
1996   edge e;
1997
1998   if (dump_file && (dump_flags & TDF_DETAILS))
1999     fprintf (dump_file, "Uses:\n\n");
2000
2001   for (i = 0; i < data->current_loop->num_nodes; i++)
2002     {
2003       edge_iterator ei;
2004       bb = body[i];
2005
2006       FOR_EACH_EDGE (e, ei, bb->succs)
2007         if (e->dest != EXIT_BLOCK_PTR
2008             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2009           find_interesting_uses_outside (data, e);
2010
2011       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2012         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2013       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2014         if (!is_gimple_debug (gsi_stmt (bsi)))
2015           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2016     }
2017
2018   if (dump_file && (dump_flags & TDF_DETAILS))
2019     {
2020       bitmap_iterator bi;
2021
2022       fprintf (dump_file, "\n");
2023
2024       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2025         {
2026           info = ver_info (data, i);
2027           if (info->inv_id)
2028             {
2029               fprintf (dump_file, "  ");
2030               print_generic_expr (dump_file, info->name, TDF_SLIM);
2031               fprintf (dump_file, " is invariant (%d)%s\n",
2032                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2033             }
2034         }
2035
2036       fprintf (dump_file, "\n");
2037     }
2038
2039   free (body);
2040 }
2041
2042 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2043    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2044    we are at the top-level of the processed address.  */
2045
2046 static tree
2047 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2048                 HOST_WIDE_INT *offset)
2049 {
2050   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2051   enum tree_code code;
2052   tree type, orig_type = TREE_TYPE (expr);
2053   HOST_WIDE_INT off0, off1, st;
2054   tree orig_expr = expr;
2055
2056   STRIP_NOPS (expr);
2057
2058   type = TREE_TYPE (expr);
2059   code = TREE_CODE (expr);
2060   *offset = 0;
2061
2062   switch (code)
2063     {
2064     case INTEGER_CST:
2065       if (!cst_and_fits_in_hwi (expr)
2066           || integer_zerop (expr))
2067         return orig_expr;
2068
2069       *offset = int_cst_value (expr);
2070       return build_int_cst (orig_type, 0);
2071
2072     case POINTER_PLUS_EXPR:
2073     case PLUS_EXPR:
2074     case MINUS_EXPR:
2075       op0 = TREE_OPERAND (expr, 0);
2076       op1 = TREE_OPERAND (expr, 1);
2077
2078       op0 = strip_offset_1 (op0, false, false, &off0);
2079       op1 = strip_offset_1 (op1, false, false, &off1);
2080
2081       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2082       if (op0 == TREE_OPERAND (expr, 0)
2083           && op1 == TREE_OPERAND (expr, 1))
2084         return orig_expr;
2085
2086       if (integer_zerop (op1))
2087         expr = op0;
2088       else if (integer_zerop (op0))
2089         {
2090           if (code == MINUS_EXPR)
2091             expr = fold_build1 (NEGATE_EXPR, type, op1);
2092           else
2093             expr = op1;
2094         }
2095       else
2096         expr = fold_build2 (code, type, op0, op1);
2097
2098       return fold_convert (orig_type, expr);
2099
2100     case MULT_EXPR:
2101       op1 = TREE_OPERAND (expr, 1);
2102       if (!cst_and_fits_in_hwi (op1))
2103         return orig_expr;
2104
2105       op0 = TREE_OPERAND (expr, 0);
2106       op0 = strip_offset_1 (op0, false, false, &off0);
2107       if (op0 == TREE_OPERAND (expr, 0))
2108         return orig_expr;
2109
2110       *offset = off0 * int_cst_value (op1);
2111       if (integer_zerop (op0))
2112         expr = op0;
2113       else
2114         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2115
2116       return fold_convert (orig_type, expr);
2117
2118     case ARRAY_REF:
2119     case ARRAY_RANGE_REF:
2120       if (!inside_addr)
2121         return orig_expr;
2122
2123       step = array_ref_element_size (expr);
2124       if (!cst_and_fits_in_hwi (step))
2125         break;
2126
2127       st = int_cst_value (step);
2128       op1 = TREE_OPERAND (expr, 1);
2129       op1 = strip_offset_1 (op1, false, false, &off1);
2130       *offset = off1 * st;
2131
2132       if (top_compref
2133           && integer_zerop (op1))
2134         {
2135           /* Strip the component reference completely.  */
2136           op0 = TREE_OPERAND (expr, 0);
2137           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2138           *offset += off0;
2139           return op0;
2140         }
2141       break;
2142
2143     case COMPONENT_REF:
2144       {
2145         tree field;
2146
2147         if (!inside_addr)
2148           return orig_expr;
2149
2150         tmp = component_ref_field_offset (expr);
2151         field = TREE_OPERAND (expr, 1);
2152         if (top_compref
2153             && cst_and_fits_in_hwi (tmp)
2154             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2155           {
2156             HOST_WIDE_INT boffset, abs_off;
2157
2158             /* Strip the component reference completely.  */
2159             op0 = TREE_OPERAND (expr, 0);
2160             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2161             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2162             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2163             if (boffset < 0)
2164               abs_off = -abs_off;
2165
2166             *offset = off0 + int_cst_value (tmp) + abs_off;
2167             return op0;
2168           }
2169       }
2170       break;
2171
2172     case ADDR_EXPR:
2173       op0 = TREE_OPERAND (expr, 0);
2174       op0 = strip_offset_1 (op0, true, true, &off0);
2175       *offset += off0;
2176
2177       if (op0 == TREE_OPERAND (expr, 0))
2178         return orig_expr;
2179
2180       expr = build_fold_addr_expr (op0);
2181       return fold_convert (orig_type, expr);
2182
2183     case MEM_REF:
2184       /* ???  Offset operand?  */
2185       inside_addr = false;
2186       break;
2187
2188     default:
2189       return orig_expr;
2190     }
2191
2192   /* Default handling of expressions for that we want to recurse into
2193      the first operand.  */
2194   op0 = TREE_OPERAND (expr, 0);
2195   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2196   *offset += off0;
2197
2198   if (op0 == TREE_OPERAND (expr, 0)
2199       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2200     return orig_expr;
2201
2202   expr = copy_node (expr);
2203   TREE_OPERAND (expr, 0) = op0;
2204   if (op1)
2205     TREE_OPERAND (expr, 1) = op1;
2206
2207   /* Inside address, we might strip the top level component references,
2208      thus changing type of the expression.  Handling of ADDR_EXPR
2209      will fix that.  */
2210   expr = fold_convert (orig_type, expr);
2211
2212   return expr;
2213 }
2214
2215 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2216
2217 static tree
2218 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2219 {
2220   HOST_WIDE_INT off;
2221   tree core = strip_offset_1 (expr, false, false, &off);
2222   *offset = off;
2223   return core;
2224 }
2225
2226 /* Returns variant of TYPE that can be used as base for different uses.
2227    We return unsigned type with the same precision, which avoids problems
2228    with overflows.  */
2229
2230 static tree
2231 generic_type_for (tree type)
2232 {
2233   if (POINTER_TYPE_P (type))
2234     return unsigned_type_for (type);
2235
2236   if (TYPE_UNSIGNED (type))
2237     return type;
2238
2239   return unsigned_type_for (type);
2240 }
2241
2242 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2243    the bitmap to that we should store it.  */
2244
2245 static struct ivopts_data *fd_ivopts_data;
2246 static tree
2247 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2248 {
2249   bitmap *depends_on = (bitmap *) data;
2250   struct version_info *info;
2251
2252   if (TREE_CODE (*expr_p) != SSA_NAME)
2253     return NULL_TREE;
2254   info = name_info (fd_ivopts_data, *expr_p);
2255
2256   if (!info->inv_id || info->has_nonlin_use)
2257     return NULL_TREE;
2258
2259   if (!*depends_on)
2260     *depends_on = BITMAP_ALLOC (NULL);
2261   bitmap_set_bit (*depends_on, info->inv_id);
2262
2263   return NULL_TREE;
2264 }
2265
2266 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2267    position to POS.  If USE is not NULL, the candidate is set as related to
2268    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2269    replacement of the final value of the iv by a direct computation.  */
2270
2271 static struct iv_cand *
2272 add_candidate_1 (struct ivopts_data *data,
2273                  tree base, tree step, bool important, enum iv_position pos,
2274                  struct iv_use *use, gimple incremented_at)
2275 {
2276   unsigned i;
2277   struct iv_cand *cand = NULL;
2278   tree type, orig_type;
2279
2280   /* For non-original variables, make sure their values are computed in a type
2281      that does not invoke undefined behavior on overflows (since in general,
2282      we cannot prove that these induction variables are non-wrapping).  */
2283   if (pos != IP_ORIGINAL)
2284     {
2285       orig_type = TREE_TYPE (base);
2286       type = generic_type_for (orig_type);
2287       if (type != orig_type)
2288         {
2289           base = fold_convert (type, base);
2290           step = fold_convert (type, step);
2291         }
2292     }
2293
2294   for (i = 0; i < n_iv_cands (data); i++)
2295     {
2296       cand = iv_cand (data, i);
2297
2298       if (cand->pos != pos)
2299         continue;
2300
2301       if (cand->incremented_at != incremented_at
2302           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2303               && cand->ainc_use != use))
2304         continue;
2305
2306       if (!cand->iv)
2307         {
2308           if (!base && !step)
2309             break;
2310
2311           continue;
2312         }
2313
2314       if (!base && !step)
2315         continue;
2316
2317       if (operand_equal_p (base, cand->iv->base, 0)
2318           && operand_equal_p (step, cand->iv->step, 0)
2319           && (TYPE_PRECISION (TREE_TYPE (base))
2320               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2321         break;
2322     }
2323
2324   if (i == n_iv_cands (data))
2325     {
2326       cand = XCNEW (struct iv_cand);
2327       cand->id = i;
2328
2329       if (!base && !step)
2330         cand->iv = NULL;
2331       else
2332         cand->iv = alloc_iv (base, step);
2333
2334       cand->pos = pos;
2335       if (pos != IP_ORIGINAL && cand->iv)
2336         {
2337           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2338           cand->var_after = cand->var_before;
2339         }
2340       cand->important = important;
2341       cand->incremented_at = incremented_at;
2342       data->iv_candidates.safe_push (cand);
2343
2344       if (step
2345           && TREE_CODE (step) != INTEGER_CST)
2346         {
2347           fd_ivopts_data = data;
2348           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2349         }
2350
2351       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2352         cand->ainc_use = use;
2353       else
2354         cand->ainc_use = NULL;
2355
2356       if (dump_file && (dump_flags & TDF_DETAILS))
2357         dump_cand (dump_file, cand);
2358     }
2359
2360   if (important && !cand->important)
2361     {
2362       cand->important = true;
2363       if (dump_file && (dump_flags & TDF_DETAILS))
2364         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2365     }
2366
2367   if (use)
2368     {
2369       bitmap_set_bit (use->related_cands, i);
2370       if (dump_file && (dump_flags & TDF_DETAILS))
2371         fprintf (dump_file, "Candidate %d is related to use %d\n",
2372                  cand->id, use->id);
2373     }
2374
2375   return cand;
2376 }
2377
2378 /* Returns true if incrementing the induction variable at the end of the LOOP
2379    is allowed.
2380
2381    The purpose is to avoid splitting latch edge with a biv increment, thus
2382    creating a jump, possibly confusing other optimization passes and leaving
2383    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2384    is not available (so we do not have a better alternative), or if the latch
2385    edge is already nonempty.  */
2386
2387 static bool
2388 allow_ip_end_pos_p (struct loop *loop)
2389 {
2390   if (!ip_normal_pos (loop))
2391     return true;
2392
2393   if (!empty_block_p (ip_end_pos (loop)))
2394     return true;
2395
2396   return false;
2397 }
2398
2399 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2400    Important field is set to IMPORTANT.  */
2401
2402 static void
2403 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2404                         bool important, struct iv_use *use)
2405 {
2406   basic_block use_bb = gimple_bb (use->stmt);
2407   enum machine_mode mem_mode;
2408   unsigned HOST_WIDE_INT cstepi;
2409
2410   /* If we insert the increment in any position other than the standard
2411      ones, we must ensure that it is incremented once per iteration.
2412      It must not be in an inner nested loop, or one side of an if
2413      statement.  */
2414   if (use_bb->loop_father != data->current_loop
2415       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2416       || stmt_could_throw_p (use->stmt)
2417       || !cst_and_fits_in_hwi (step))
2418     return;
2419
2420   cstepi = int_cst_value (step);
2421
2422   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2423   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2424         || USE_STORE_PRE_INCREMENT (mem_mode))
2425        && GET_MODE_SIZE (mem_mode) == cstepi)
2426       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2427            || USE_STORE_PRE_DECREMENT (mem_mode))
2428           && GET_MODE_SIZE (mem_mode) == -cstepi))
2429     {
2430       enum tree_code code = MINUS_EXPR;
2431       tree new_base;
2432       tree new_step = step;
2433
2434       if (POINTER_TYPE_P (TREE_TYPE (base)))
2435         {
2436           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2437           code = POINTER_PLUS_EXPR;
2438         }
2439       else
2440         new_step = fold_convert (TREE_TYPE (base), new_step);
2441       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2442       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2443                        use->stmt);
2444     }
2445   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2446         || USE_STORE_POST_INCREMENT (mem_mode))
2447        && GET_MODE_SIZE (mem_mode) == cstepi)
2448       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2449            || USE_STORE_POST_DECREMENT (mem_mode))
2450           && GET_MODE_SIZE (mem_mode) == -cstepi))
2451     {
2452       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2453                        use->stmt);
2454     }
2455 }
2456
2457 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2458    position to POS.  If USE is not NULL, the candidate is set as related to
2459    it.  The candidate computation is scheduled on all available positions.  */
2460
2461 static void
2462 add_candidate (struct ivopts_data *data,
2463                tree base, tree step, bool important, struct iv_use *use)
2464 {
2465   if (ip_normal_pos (data->current_loop))
2466     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2467   if (ip_end_pos (data->current_loop)
2468       && allow_ip_end_pos_p (data->current_loop))
2469     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2470
2471   if (use != NULL && use->type == USE_ADDRESS)
2472     add_autoinc_candidates (data, base, step, important, use);
2473 }
2474
2475 /* Adds standard iv candidates.  */
2476
2477 static void
2478 add_standard_iv_candidates (struct ivopts_data *data)
2479 {
2480   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2481
2482   /* The same for a double-integer type if it is still fast enough.  */
2483   if (TYPE_PRECISION
2484         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2485       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2486     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2487                    build_int_cst (long_integer_type_node, 1), true, NULL);
2488
2489   /* The same for a double-integer type if it is still fast enough.  */
2490   if (TYPE_PRECISION
2491         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2492       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2493     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2494                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2495 }
2496
2497
2498 /* Adds candidates bases on the old induction variable IV.  */
2499
2500 static void
2501 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2502 {
2503   gimple phi;
2504   tree def;
2505   struct iv_cand *cand;
2506
2507   add_candidate (data, iv->base, iv->step, true, NULL);
2508
2509   /* The same, but with initial value zero.  */
2510   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2511     add_candidate (data, size_int (0), iv->step, true, NULL);
2512   else
2513     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2514                    iv->step, true, NULL);
2515
2516   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2517   if (gimple_code (phi) == GIMPLE_PHI)
2518     {
2519       /* Additionally record the possibility of leaving the original iv
2520          untouched.  */
2521       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2522       cand = add_candidate_1 (data,
2523                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2524                               SSA_NAME_DEF_STMT (def));
2525       cand->var_before = iv->ssa_name;
2526       cand->var_after = def;
2527     }
2528 }
2529
2530 /* Adds candidates based on the old induction variables.  */
2531
2532 static void
2533 add_old_ivs_candidates (struct ivopts_data *data)
2534 {
2535   unsigned i;
2536   struct iv *iv;
2537   bitmap_iterator bi;
2538
2539   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2540     {
2541       iv = ver_info (data, i)->iv;
2542       if (iv && iv->biv_p && !integer_zerop (iv->step))
2543         add_old_iv_candidates (data, iv);
2544     }
2545 }
2546
2547 /* Adds candidates based on the value of the induction variable IV and USE.  */
2548
2549 static void
2550 add_iv_value_candidates (struct ivopts_data *data,
2551                          struct iv *iv, struct iv_use *use)
2552 {
2553   unsigned HOST_WIDE_INT offset;
2554   tree base;
2555   tree basetype;
2556
2557   add_candidate (data, iv->base, iv->step, false, use);
2558
2559   /* The same, but with initial value zero.  Make such variable important,
2560      since it is generic enough so that possibly many uses may be based
2561      on it.  */
2562   basetype = TREE_TYPE (iv->base);
2563   if (POINTER_TYPE_P (basetype))
2564     basetype = sizetype;
2565   add_candidate (data, build_int_cst (basetype, 0),
2566                  iv->step, true, use);
2567
2568   /* Third, try removing the constant offset.  Make sure to even
2569      add a candidate for &a[0] vs. (T *)&a.  */
2570   base = strip_offset (iv->base, &offset);
2571   if (offset
2572       || base != iv->base)
2573     add_candidate (data, base, iv->step, false, use);
2574 }
2575
2576 /* Adds candidates based on the uses.  */
2577
2578 static void
2579 add_derived_ivs_candidates (struct ivopts_data *data)
2580 {
2581   unsigned i;
2582
2583   for (i = 0; i < n_iv_uses (data); i++)
2584     {
2585       struct iv_use *use = iv_use (data, i);
2586
2587       if (!use)
2588         continue;
2589
2590       switch (use->type)
2591         {
2592         case USE_NONLINEAR_EXPR:
2593         case USE_COMPARE:
2594         case USE_ADDRESS:
2595           /* Just add the ivs based on the value of the iv used here.  */
2596           add_iv_value_candidates (data, use->iv, use);
2597           break;
2598
2599         default:
2600           gcc_unreachable ();
2601         }
2602     }
2603 }
2604
2605 /* Record important candidates and add them to related_cands bitmaps
2606    if needed.  */
2607
2608 static void
2609 record_important_candidates (struct ivopts_data *data)
2610 {
2611   unsigned i;
2612   struct iv_use *use;
2613
2614   for (i = 0; i < n_iv_cands (data); i++)
2615     {
2616       struct iv_cand *cand = iv_cand (data, i);
2617
2618       if (cand->important)
2619         bitmap_set_bit (data->important_candidates, i);
2620     }
2621
2622   data->consider_all_candidates = (n_iv_cands (data)
2623                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2624
2625   if (data->consider_all_candidates)
2626     {
2627       /* We will not need "related_cands" bitmaps in this case,
2628          so release them to decrease peak memory consumption.  */
2629       for (i = 0; i < n_iv_uses (data); i++)
2630         {
2631           use = iv_use (data, i);
2632           BITMAP_FREE (use->related_cands);
2633         }
2634     }
2635   else
2636     {
2637       /* Add important candidates to the related_cands bitmaps.  */
2638       for (i = 0; i < n_iv_uses (data); i++)
2639         bitmap_ior_into (iv_use (data, i)->related_cands,
2640                          data->important_candidates);
2641     }
2642 }
2643
2644 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2645    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2646    we allocate a simple list to every use.  */
2647
2648 static void
2649 alloc_use_cost_map (struct ivopts_data *data)
2650 {
2651   unsigned i, size, s;
2652
2653   for (i = 0; i < n_iv_uses (data); i++)
2654     {
2655       struct iv_use *use = iv_use (data, i);
2656
2657       if (data->consider_all_candidates)
2658         size = n_iv_cands (data);
2659       else
2660         {
2661           s = bitmap_count_bits (use->related_cands);
2662
2663           /* Round up to the power of two, so that moduling by it is fast.  */
2664           size = s ? (1 << ceil_log2 (s)) : 1;
2665         }
2666
2667       use->n_map_members = size;
2668       use->cost_map = XCNEWVEC (struct cost_pair, size);
2669     }
2670 }
2671
2672 /* Returns description of computation cost of expression whose runtime
2673    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2674
2675 static comp_cost
2676 new_cost (unsigned runtime, unsigned complexity)
2677 {
2678   comp_cost cost;
2679
2680   cost.cost = runtime;
2681   cost.complexity = complexity;
2682
2683   return cost;
2684 }
2685
2686 /* Adds costs COST1 and COST2.  */
2687
2688 static comp_cost
2689 add_costs (comp_cost cost1, comp_cost cost2)
2690 {
2691   cost1.cost += cost2.cost;
2692   cost1.complexity += cost2.complexity;
2693
2694   return cost1;
2695 }
2696 /* Subtracts costs COST1 and COST2.  */
2697
2698 static comp_cost
2699 sub_costs (comp_cost cost1, comp_cost cost2)
2700 {
2701   cost1.cost -= cost2.cost;
2702   cost1.complexity -= cost2.complexity;
2703
2704   return cost1;
2705 }
2706
2707 /* Returns a negative number if COST1 < COST2, a positive number if
2708    COST1 > COST2, and 0 if COST1 = COST2.  */
2709
2710 static int
2711 compare_costs (comp_cost cost1, comp_cost cost2)
2712 {
2713   if (cost1.cost == cost2.cost)
2714     return cost1.complexity - cost2.complexity;
2715
2716   return cost1.cost - cost2.cost;
2717 }
2718
2719 /* Returns true if COST is infinite.  */
2720
2721 static bool
2722 infinite_cost_p (comp_cost cost)
2723 {
2724   return cost.cost == INFTY;
2725 }
2726
2727 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2728    on invariants DEPENDS_ON and that the value used in expressing it
2729    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2730
2731 static void
2732 set_use_iv_cost (struct ivopts_data *data,
2733                  struct iv_use *use, struct iv_cand *cand,
2734                  comp_cost cost, bitmap depends_on, tree value,
2735                  enum tree_code comp, int inv_expr_id)
2736 {
2737   unsigned i, s;
2738
2739   if (infinite_cost_p (cost))
2740     {
2741       BITMAP_FREE (depends_on);
2742       return;
2743     }
2744
2745   if (data->consider_all_candidates)
2746     {
2747       use->cost_map[cand->id].cand = cand;
2748       use->cost_map[cand->id].cost = cost;
2749       use->cost_map[cand->id].depends_on = depends_on;
2750       use->cost_map[cand->id].value = value;
2751       use->cost_map[cand->id].comp = comp;
2752       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2753       return;
2754     }
2755
2756   /* n_map_members is a power of two, so this computes modulo.  */
2757   s = cand->id & (use->n_map_members - 1);
2758   for (i = s; i < use->n_map_members; i++)
2759     if (!use->cost_map[i].cand)
2760       goto found;
2761   for (i = 0; i < s; i++)
2762     if (!use->cost_map[i].cand)
2763       goto found;
2764
2765   gcc_unreachable ();
2766
2767 found:
2768   use->cost_map[i].cand = cand;
2769   use->cost_map[i].cost = cost;
2770   use->cost_map[i].depends_on = depends_on;
2771   use->cost_map[i].value = value;
2772   use->cost_map[i].comp = comp;
2773   use->cost_map[i].inv_expr_id = inv_expr_id;
2774 }
2775
2776 /* Gets cost of (USE, CANDIDATE) pair.  */
2777
2778 static struct cost_pair *
2779 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2780                  struct iv_cand *cand)
2781 {
2782   unsigned i, s;
2783   struct cost_pair *ret;
2784
2785   if (!cand)
2786     return NULL;
2787
2788   if (data->consider_all_candidates)
2789     {
2790       ret = use->cost_map + cand->id;
2791       if (!ret->cand)
2792         return NULL;
2793
2794       return ret;
2795     }
2796
2797   /* n_map_members is a power of two, so this computes modulo.  */
2798   s = cand->id & (use->n_map_members - 1);
2799   for (i = s; i < use->n_map_members; i++)
2800     if (use->cost_map[i].cand == cand)
2801       return use->cost_map + i;
2802     else if (use->cost_map[i].cand == NULL)
2803       return NULL;
2804   for (i = 0; i < s; i++)
2805     if (use->cost_map[i].cand == cand)
2806       return use->cost_map + i;
2807     else if (use->cost_map[i].cand == NULL)
2808       return NULL;
2809
2810   return NULL;
2811 }
2812
2813 /* Returns estimate on cost of computing SEQ.  */
2814
2815 static unsigned
2816 seq_cost (rtx seq, bool speed)
2817 {
2818   unsigned cost = 0;
2819   rtx set;
2820
2821   for (; seq; seq = NEXT_INSN (seq))
2822     {
2823       set = single_set (seq);
2824       if (set)
2825         cost += set_src_cost (SET_SRC (set), speed);
2826       else
2827         cost++;
2828     }
2829
2830   return cost;
2831 }
2832
2833 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2834 static rtx
2835 produce_memory_decl_rtl (tree obj, int *regno)
2836 {
2837   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2838   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2839   rtx x;
2840
2841   gcc_assert (obj);
2842   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2843     {
2844       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2845       x = gen_rtx_SYMBOL_REF (address_mode, name);
2846       SET_SYMBOL_REF_DECL (x, obj);
2847       x = gen_rtx_MEM (DECL_MODE (obj), x);
2848       set_mem_addr_space (x, as);
2849       targetm.encode_section_info (obj, x, true);
2850     }
2851   else
2852     {
2853       x = gen_raw_REG (address_mode, (*regno)++);
2854       x = gen_rtx_MEM (DECL_MODE (obj), x);
2855       set_mem_addr_space (x, as);
2856     }
2857
2858   return x;
2859 }
2860
2861 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2862    walk_tree.  DATA contains the actual fake register number.  */
2863
2864 static tree
2865 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2866 {
2867   tree obj = NULL_TREE;
2868   rtx x = NULL_RTX;
2869   int *regno = (int *) data;
2870
2871   switch (TREE_CODE (*expr_p))
2872     {
2873     case ADDR_EXPR:
2874       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2875            handled_component_p (*expr_p);
2876            expr_p = &TREE_OPERAND (*expr_p, 0))
2877         continue;
2878       obj = *expr_p;
2879       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2880         x = produce_memory_decl_rtl (obj, regno);
2881       break;
2882
2883     case SSA_NAME:
2884       *ws = 0;
2885       obj = SSA_NAME_VAR (*expr_p);
2886       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2887       if (!obj)
2888         return NULL_TREE;
2889       if (!DECL_RTL_SET_P (obj))
2890         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2891       break;
2892
2893     case VAR_DECL:
2894     case PARM_DECL:
2895     case RESULT_DECL:
2896       *ws = 0;
2897       obj = *expr_p;
2898
2899       if (DECL_RTL_SET_P (obj))
2900         break;
2901
2902       if (DECL_MODE (obj) == BLKmode)
2903         x = produce_memory_decl_rtl (obj, regno);
2904       else
2905         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2906
2907       break;
2908
2909     default:
2910       break;
2911     }
2912
2913   if (x)
2914     {
2915       decl_rtl_to_reset.safe_push (obj);
2916       SET_DECL_RTL (obj, x);
2917     }
2918
2919   return NULL_TREE;
2920 }
2921
2922 /* Determines cost of the computation of EXPR.  */
2923
2924 static unsigned
2925 computation_cost (tree expr, bool speed)
2926 {
2927   rtx seq, rslt;
2928   tree type = TREE_TYPE (expr);
2929   unsigned cost;
2930   /* Avoid using hard regs in ways which may be unsupported.  */
2931   int regno = LAST_VIRTUAL_REGISTER + 1;
2932   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2933   enum node_frequency real_frequency = node->frequency;
2934
2935   node->frequency = NODE_FREQUENCY_NORMAL;
2936   crtl->maybe_hot_insn_p = speed;
2937   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2938   start_sequence ();
2939   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2940   seq = get_insns ();
2941   end_sequence ();
2942   default_rtl_profile ();
2943   node->frequency = real_frequency;
2944
2945   cost = seq_cost (seq, speed);
2946   if (MEM_P (rslt))
2947     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2948                           TYPE_ADDR_SPACE (type), speed);
2949   else if (!REG_P (rslt))
2950     cost += set_src_cost (rslt, speed);
2951
2952   return cost;
2953 }
2954
2955 /* Returns variable containing the value of candidate CAND at statement AT.  */
2956
2957 static tree
2958 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2959 {
2960   if (stmt_after_increment (loop, cand, stmt))
2961     return cand->var_after;
2962   else
2963     return cand->var_before;
2964 }
2965
2966 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2967    same precision that is at least as wide as the precision of TYPE, stores
2968    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2969    type of A and B.  */
2970
2971 static tree
2972 determine_common_wider_type (tree *a, tree *b)
2973 {
2974   tree wider_type = NULL;
2975   tree suba, subb;
2976   tree atype = TREE_TYPE (*a);
2977
2978   if (CONVERT_EXPR_P (*a))
2979     {
2980       suba = TREE_OPERAND (*a, 0);
2981       wider_type = TREE_TYPE (suba);
2982       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2983         return atype;
2984     }
2985   else
2986     return atype;
2987
2988   if (CONVERT_EXPR_P (*b))
2989     {
2990       subb = TREE_OPERAND (*b, 0);
2991       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2992         return atype;
2993     }
2994   else
2995     return atype;
2996
2997   *a = suba;
2998   *b = subb;
2999   return wider_type;
3000 }
3001
3002 /* Determines the expression by that USE is expressed from induction variable
3003    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3004    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3005
3006 static bool
3007 get_computation_aff (struct loop *loop,
3008                      struct iv_use *use, struct iv_cand *cand, gimple at,
3009                      struct affine_tree_combination *aff)
3010 {
3011   tree ubase = use->iv->base;
3012   tree ustep = use->iv->step;
3013   tree cbase = cand->iv->base;
3014   tree cstep = cand->iv->step, cstep_common;
3015   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3016   tree common_type, var;
3017   tree uutype;
3018   aff_tree cbase_aff, var_aff;
3019   double_int rat;
3020
3021   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3022     {
3023       /* We do not have a precision to express the values of use.  */
3024       return false;
3025     }
3026
3027   var = var_at_stmt (loop, cand, at);
3028   uutype = unsigned_type_for (utype);
3029
3030   /* If the conversion is not noop, perform it.  */
3031   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3032     {
3033       cstep = fold_convert (uutype, cstep);
3034       cbase = fold_convert (uutype, cbase);
3035       var = fold_convert (uutype, var);
3036     }
3037
3038   if (!constant_multiple_of (ustep, cstep, &rat))
3039     return false;
3040
3041   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3042      type, we achieve better folding by computing their difference in this
3043      wider type, and cast the result to UUTYPE.  We do not need to worry about
3044      overflows, as all the arithmetics will in the end be performed in UUTYPE
3045      anyway.  */
3046   common_type = determine_common_wider_type (&ubase, &cbase);
3047
3048   /* use = ubase - ratio * cbase + ratio * var.  */
3049   tree_to_aff_combination (ubase, common_type, aff);
3050   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3051   tree_to_aff_combination (var, uutype, &var_aff);
3052
3053   /* We need to shift the value if we are after the increment.  */
3054   if (stmt_after_increment (loop, cand, at))
3055     {
3056       aff_tree cstep_aff;
3057
3058       if (common_type != uutype)
3059         cstep_common = fold_convert (common_type, cstep);
3060       else
3061         cstep_common = cstep;
3062
3063       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3064       aff_combination_add (&cbase_aff, &cstep_aff);
3065     }
3066
3067   aff_combination_scale (&cbase_aff, -rat);
3068   aff_combination_add (aff, &cbase_aff);
3069   if (common_type != uutype)
3070     aff_combination_convert (aff, uutype);
3071
3072   aff_combination_scale (&var_aff, rat);
3073   aff_combination_add (aff, &var_aff);
3074
3075   return true;
3076 }
3077
3078 /* Return the type of USE.  */
3079
3080 static tree
3081 get_use_type (struct iv_use *use)
3082 {
3083   tree base_type = TREE_TYPE (use->iv->base);
3084   tree type;
3085
3086   if (use->type == USE_ADDRESS)
3087     {
3088       /* The base_type may be a void pointer.  Create a pointer type based on
3089          the mem_ref instead.  */
3090       type = build_pointer_type (TREE_TYPE (*use->op_p));
3091       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3092                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3093     }
3094   else
3095     type = base_type;
3096
3097   return type;
3098 }
3099
3100 /* Determines the expression by that USE is expressed from induction variable
3101    CAND at statement AT in LOOP.  The computation is unshared.  */
3102
3103 static tree
3104 get_computation_at (struct loop *loop,
3105                     struct iv_use *use, struct iv_cand *cand, gimple at)
3106 {
3107   aff_tree aff;
3108   tree type = get_use_type (use);
3109
3110   if (!get_computation_aff (loop, use, cand, at, &aff))
3111     return NULL_TREE;
3112   unshare_aff_combination (&aff);
3113   return fold_convert (type, aff_combination_to_tree (&aff));
3114 }
3115
3116 /* Determines the expression by that USE is expressed from induction variable
3117    CAND in LOOP.  The computation is unshared.  */
3118
3119 static tree
3120 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3121 {
3122   return get_computation_at (loop, use, cand, use->stmt);
3123 }
3124
3125 /* Adjust the cost COST for being in loop setup rather than loop body.
3126    If we're optimizing for space, the loop setup overhead is constant;
3127    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3128 static unsigned
3129 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3130 {
3131   if (cost == INFTY)
3132     return cost;
3133   else if (optimize_loop_for_speed_p (data->current_loop))
3134     return cost / avg_loop_niter (data->current_loop);
3135   else
3136     return cost;
3137 }
3138
3139 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3140    validity for a memory reference accessing memory of mode MODE in
3141    address space AS.  */
3142
3143
3144 bool
3145 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3146                                  addr_space_t as)
3147 {
3148 #define MAX_RATIO 128
3149   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3150   static vec<sbitmap> valid_mult_list;
3151   sbitmap valid_mult;
3152
3153   if (data_index >= valid_mult_list.length ())
3154     valid_mult_list.safe_grow_cleared (data_index + 1);
3155
3156   valid_mult = valid_mult_list[data_index];
3157   if (!valid_mult)
3158     {
3159       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3160       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3161       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3162       rtx addr, scaled;
3163       HOST_WIDE_INT i;
3164
3165       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3166       bitmap_clear (valid_mult);
3167       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3168       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3169       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3170         {
3171           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3172           if (memory_address_addr_space_p (mode, addr, as)
3173               || memory_address_addr_space_p (mode, scaled, as))
3174             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3175         }
3176
3177       if (dump_file && (dump_flags & TDF_DETAILS))
3178         {
3179           fprintf (dump_file, "  allowed multipliers:");
3180           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3181             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3182               fprintf (dump_file, " %d", (int) i);
3183           fprintf (dump_file, "\n");
3184           fprintf (dump_file, "\n");
3185         }
3186
3187       valid_mult_list[data_index] = valid_mult;
3188     }
3189
3190   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3191     return false;
3192
3193   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3194 }
3195
3196 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3197    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3198    variable is omitted.  Compute the cost for a memory reference that accesses
3199    a memory location of mode MEM_MODE in address space AS.
3200
3201    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3202    size of MEM_MODE / RATIO) is available.  To make this determination, we
3203    look at the size of the increment to be made, which is given in CSTEP.
3204    CSTEP may be zero if the step is unknown.
3205    STMT_AFTER_INC is true iff the statement we're looking at is after the
3206    increment of the original biv.
3207
3208    TODO -- there must be some better way.  This all is quite crude.  */
3209
3210 typedef struct address_cost_data_s
3211 {
3212   HOST_WIDE_INT min_offset, max_offset;
3213   unsigned costs[2][2][2][2];
3214 } *address_cost_data;
3215
3216
3217 static comp_cost
3218 get_address_cost (bool symbol_present, bool var_present,
3219                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3220                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3221                   addr_space_t as, bool speed,
3222                   bool stmt_after_inc, bool *may_autoinc)
3223 {
3224   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3225   static vec<address_cost_data> address_cost_data_list;
3226   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3227   address_cost_data data;
3228   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3229   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3230   unsigned cost, acost, complexity;
3231   bool offset_p, ratio_p, autoinc;
3232   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3233   unsigned HOST_WIDE_INT mask;
3234   unsigned bits;
3235
3236   if (data_index >= address_cost_data_list.length ())
3237     address_cost_data_list.safe_grow_cleared (data_index + 1);
3238
3239   data = address_cost_data_list[data_index];
3240   if (!data)
3241     {
3242       HOST_WIDE_INT i;
3243       HOST_WIDE_INT rat, off = 0;
3244       int old_cse_not_expected, width;
3245       unsigned sym_p, var_p, off_p, rat_p, add_c;
3246       rtx seq, addr, base;
3247       rtx reg0, reg1;
3248
3249       data = (address_cost_data) xcalloc (1, sizeof (*data));
3250
3251       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3252
3253       width = GET_MODE_BITSIZE (address_mode) - 1;
3254       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3255         width = HOST_BITS_PER_WIDE_INT - 1;
3256       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3257
3258       for (i = width; i >= 0; i--)
3259         {
3260           off = -((unsigned HOST_WIDE_INT) 1 << i);
3261           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3262           if (memory_address_addr_space_p (mem_mode, addr, as))
3263             break;
3264         }
3265       data->min_offset = (i == -1? 0 : off);
3266
3267       for (i = width; i >= 0; i--)
3268         {
3269           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3270           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3271           if (memory_address_addr_space_p (mem_mode, addr, as))
3272             break;
3273         }
3274       if (i == -1)
3275         off = 0;
3276       data->max_offset = off;
3277
3278       if (dump_file && (dump_flags & TDF_DETAILS))
3279         {
3280           fprintf (dump_file, "get_address_cost:\n");
3281           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3282                    GET_MODE_NAME (mem_mode),
3283                    data->min_offset);
3284           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3285                    GET_MODE_NAME (mem_mode),
3286                    data->max_offset);
3287         }
3288
3289       rat = 1;
3290       for (i = 2; i <= MAX_RATIO; i++)
3291         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3292           {
3293             rat = i;
3294             break;
3295           }
3296
3297       /* Compute the cost of various addressing modes.  */
3298       acost = 0;
3299       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3300       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3301
3302       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3303           || USE_STORE_PRE_DECREMENT (mem_mode))
3304         {
3305           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3306           has_predec[mem_mode]
3307             = memory_address_addr_space_p (mem_mode, addr, as);
3308         }
3309       if (USE_LOAD_POST_DECREMENT (mem_mode)
3310           || USE_STORE_POST_DECREMENT (mem_mode))
3311         {
3312           addr = gen_rtx_POST_DEC (address_mode, reg0);
3313           has_postdec[mem_mode]
3314             = memory_address_addr_space_p (mem_mode, addr, as);
3315         }
3316       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3317           || USE_STORE_PRE_DECREMENT (mem_mode))
3318         {
3319           addr = gen_rtx_PRE_INC (address_mode, reg0);
3320           has_preinc[mem_mode]
3321             = memory_address_addr_space_p (mem_mode, addr, as);
3322         }
3323       if (USE_LOAD_POST_INCREMENT (mem_mode)
3324           || USE_STORE_POST_INCREMENT (mem_mode))
3325         {
3326           addr = gen_rtx_POST_INC (address_mode, reg0);
3327           has_postinc[mem_mode]
3328             = memory_address_addr_space_p (mem_mode, addr, as);
3329         }
3330       for (i = 0; i < 16; i++)
3331         {
3332           sym_p = i & 1;
3333           var_p = (i >> 1) & 1;
3334           off_p = (i >> 2) & 1;
3335           rat_p = (i >> 3) & 1;
3336
3337           addr = reg0;
3338           if (rat_p)
3339             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3340                                    gen_int_mode (rat, address_mode));
3341
3342           if (var_p)
3343             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3344
3345           if (sym_p)
3346             {
3347               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3348               /* ??? We can run into trouble with some backends by presenting
3349                  it with symbols which haven't been properly passed through
3350                  targetm.encode_section_info.  By setting the local bit, we
3351                  enhance the probability of things working.  */
3352               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3353
3354               if (off_p)
3355                 base = gen_rtx_fmt_e (CONST, address_mode,
3356                                       gen_rtx_fmt_ee
3357                                         (PLUS, address_mode, base,
3358                                          gen_int_mode (off, address_mode)));
3359             }
3360           else if (off_p)
3361             base = gen_int_mode (off, address_mode);
3362           else
3363             base = NULL_RTX;
3364
3365           if (base)
3366             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3367
3368           start_sequence ();
3369           /* To avoid splitting addressing modes, pretend that no cse will
3370              follow.  */
3371           old_cse_not_expected = cse_not_expected;
3372           cse_not_expected = true;
3373           addr = memory_address_addr_space (mem_mode, addr, as);
3374           cse_not_expected = old_cse_not_expected;
3375           seq = get_insns ();
3376           end_sequence ();
3377
3378           acost = seq_cost (seq, speed);
3379           acost += address_cost (addr, mem_mode, as, speed);
3380
3381           if (!acost)
3382             acost = 1;
3383           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3384         }
3385
3386       /* On some targets, it is quite expensive to load symbol to a register,
3387          which makes addresses that contain symbols look much more expensive.
3388          However, the symbol will have to be loaded in any case before the
3389          loop (and quite likely we have it in register already), so it does not
3390          make much sense to penalize them too heavily.  So make some final
3391          tweaks for the SYMBOL_PRESENT modes:
3392
3393          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3394          var is cheaper, use this mode with small penalty.
3395          If VAR_PRESENT is true, try whether the mode with
3396          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3397          if this is the case, use it.  */
3398       add_c = add_cost (speed, address_mode);
3399       for (i = 0; i < 8; i++)
3400         {
3401           var_p = i & 1;
3402           off_p = (i >> 1) & 1;
3403           rat_p = (i >> 2) & 1;
3404
3405           acost = data->costs[0][1][off_p][rat_p] + 1;
3406           if (var_p)
3407             acost += add_c;
3408
3409           if (acost < data->costs[1][var_p][off_p][rat_p])
3410             data->costs[1][var_p][off_p][rat_p] = acost;
3411         }
3412
3413       if (dump_file && (dump_flags & TDF_DETAILS))
3414         {
3415           fprintf (dump_file, "Address costs:\n");
3416
3417           for (i = 0; i < 16; i++)
3418             {
3419               sym_p = i & 1;
3420               var_p = (i >> 1) & 1;
3421               off_p = (i >> 2) & 1;
3422               rat_p = (i >> 3) & 1;
3423
3424               fprintf (dump_file, "  ");
3425               if (sym_p)
3426                 fprintf (dump_file, "sym + ");
3427               if (var_p)
3428                 fprintf (dump_file, "var + ");
3429               if (off_p)
3430                 fprintf (dump_file, "cst + ");
3431               if (rat_p)
3432                 fprintf (dump_file, "rat * ");
3433
3434               acost = data->costs[sym_p][var_p][off_p][rat_p];
3435               fprintf (dump_file, "index costs %d\n", acost);
3436             }
3437           if (has_predec[mem_mode] || has_postdec[mem_mode]
3438               || has_preinc[mem_mode] || has_postinc[mem_mode])
3439             fprintf (dump_file, "  May include autoinc/dec\n");
3440           fprintf (dump_file, "\n");
3441         }
3442
3443       address_cost_data_list[data_index] = data;
3444     }
3445
3446   bits = GET_MODE_BITSIZE (address_mode);
3447   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3448   offset &= mask;
3449   if ((offset >> (bits - 1) & 1))
3450     offset |= ~mask;
3451   s_offset = offset;
3452
3453   autoinc = false;
3454   msize = GET_MODE_SIZE (mem_mode);
3455   autoinc_offset = offset;
3456   if (stmt_after_inc)
3457     autoinc_offset += ratio * cstep;
3458   if (symbol_present || var_present || ratio != 1)
3459     autoinc = false;
3460   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3461                && msize == cstep)
3462            || (has_postdec[mem_mode] && autoinc_offset == 0
3463                && msize == -cstep)
3464            || (has_preinc[mem_mode] && autoinc_offset == msize
3465                && msize == cstep)
3466            || (has_predec[mem_mode] && autoinc_offset == -msize
3467                && msize == -cstep))
3468     autoinc = true;
3469
3470   cost = 0;
3471   offset_p = (s_offset != 0
3472               && data->min_offset <= s_offset
3473               && s_offset <= data->max_offset);
3474   ratio_p = (ratio != 1
3475              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3476
3477   if (ratio != 1 && !ratio_p)
3478     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3479
3480   if (s_offset && !offset_p && !symbol_present)
3481     cost += add_cost (speed, address_mode);
3482
3483   if (may_autoinc)
3484     *may_autoinc = autoinc;
3485   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3486   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3487   return new_cost (cost + acost, complexity);
3488 }
3489
3490  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3491     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3492     calculating the operands of EXPR.  Returns true if successful, and returns
3493     the cost in COST.  */
3494
3495 static bool
3496 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3497                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3498 {
3499   comp_cost res;
3500   tree op1 = TREE_OPERAND (expr, 1);
3501   tree cst = TREE_OPERAND (mult, 1);
3502   tree multop = TREE_OPERAND (mult, 0);
3503   int m = exact_log2 (int_cst_value (cst));
3504   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3505   int sa_cost;
3506   bool equal_p = false;
3507
3508   if (!(m >= 0 && m < maxm))
3509     return false;
3510
3511   if (operand_equal_p (op1, mult, 0))
3512     equal_p = true;
3513
3514   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3515              ? shiftadd_cost (speed, mode, m)
3516              : (equal_p
3517                 ? shiftsub1_cost (speed, mode, m)
3518                 : shiftsub0_cost (speed, mode, m)));
3519   res = new_cost (sa_cost, 0);
3520   res = add_costs (res, equal_p ? cost0 : cost1);
3521
3522   STRIP_NOPS (multop);
3523   if (!is_gimple_val (multop))
3524     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3525
3526   *cost = res;
3527   return true;
3528 }
3529
3530 /* Estimates cost of forcing expression EXPR into a variable.  */
3531
3532 static comp_cost
3533 force_expr_to_var_cost (tree expr, bool speed)
3534 {
3535   static bool costs_initialized = false;
3536   static unsigned integer_cost [2];
3537   static unsigned symbol_cost [2];
3538   static unsigned address_cost [2];
3539   tree op0, op1;
3540   comp_cost cost0, cost1, cost;
3541   enum machine_mode mode;
3542
3543   if (!costs_initialized)
3544     {
3545       tree type = build_pointer_type (integer_type_node);
3546       tree var, addr;
3547       rtx x;
3548       int i;
3549
3550       var = create_tmp_var_raw (integer_type_node, "test_var");
3551       TREE_STATIC (var) = 1;
3552       x = produce_memory_decl_rtl (var, NULL);
3553       SET_DECL_RTL (var, x);
3554
3555       addr = build1 (ADDR_EXPR, type, var);
3556
3557
3558       for (i = 0; i < 2; i++)
3559         {
3560           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3561                                                              2000), i);
3562
3563           symbol_cost[i] = computation_cost (addr, i) + 1;
3564
3565           address_cost[i]
3566             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3567           if (dump_file && (dump_flags & TDF_DETAILS))
3568             {
3569               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3570               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3571               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3572               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3573               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3574               fprintf (dump_file, "\n");
3575             }
3576         }
3577
3578       costs_initialized = true;
3579     }
3580
3581   STRIP_NOPS (expr);
3582
3583   if (SSA_VAR_P (expr))
3584     return no_cost;
3585
3586   if (is_gimple_min_invariant (expr))
3587     {
3588       if (TREE_CODE (expr) == INTEGER_CST)
3589         return new_cost (integer_cost [speed], 0);
3590
3591       if (TREE_CODE (expr) == ADDR_EXPR)
3592         {
3593           tree obj = TREE_OPERAND (expr, 0);
3594
3595           if (TREE_CODE (obj) == VAR_DECL
3596               || TREE_CODE (obj) == PARM_DECL
3597               || TREE_CODE (obj) == RESULT_DECL)
3598             return new_cost (symbol_cost [speed], 0);
3599         }
3600
3601       return new_cost (address_cost [speed], 0);
3602     }
3603
3604   switch (TREE_CODE (expr))
3605     {
3606     case POINTER_PLUS_EXPR:
3607     case PLUS_EXPR:
3608     case MINUS_EXPR:
3609     case MULT_EXPR:
3610       op0 = TREE_OPERAND (expr, 0);
3611       op1 = TREE_OPERAND (expr, 1);
3612       STRIP_NOPS (op0);
3613       STRIP_NOPS (op1);
3614       break;
3615
3616     CASE_CONVERT:
3617     case NEGATE_EXPR:
3618       op0 = TREE_OPERAND (expr, 0);
3619       STRIP_NOPS (op0);
3620       op1 = NULL_TREE;
3621       break;
3622
3623     default:
3624       /* Just an arbitrary value, FIXME.  */
3625       return new_cost (target_spill_cost[speed], 0);
3626     }
3627
3628   if (op0 == NULL_TREE
3629       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3630     cost0 = no_cost;
3631   else
3632     cost0 = force_expr_to_var_cost (op0, speed);
3633
3634   if (op1 == NULL_TREE
3635       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3636     cost1 = no_cost;
3637   else
3638     cost1 = force_expr_to_var_cost (op1, speed);
3639
3640   mode = TYPE_MODE (TREE_TYPE (expr));
3641   switch (TREE_CODE (expr))
3642     {
3643     case POINTER_PLUS_EXPR:
3644     case PLUS_EXPR:
3645     case MINUS_EXPR:
3646     case NEGATE_EXPR:
3647       cost = new_cost (add_cost (speed, mode), 0);
3648       if (TREE_CODE (expr) != NEGATE_EXPR)
3649         {
3650           tree mult = NULL_TREE;
3651           comp_cost sa_cost;
3652           if (TREE_CODE (op1) == MULT_EXPR)
3653             mult = op1;
3654           else if (TREE_CODE (op0) == MULT_EXPR)
3655             mult = op0;
3656
3657           if (mult != NULL_TREE
3658               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3659               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3660                                     speed, &sa_cost))
3661             return sa_cost;
3662         }
3663       break;
3664
3665     CASE_CONVERT:
3666       {
3667         tree inner_mode, outer_mode;
3668         outer_mode = TREE_TYPE (expr);
3669         inner_mode = TREE_TYPE (op0);
3670         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3671                                        TYPE_MODE (inner_mode), speed), 0);
3672       }
3673       break;
3674
3675     case MULT_EXPR:
3676       if (cst_and_fits_in_hwi (op0))
3677         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3678                                              mode, speed), 0);
3679       else if (cst_and_fits_in_hwi (op1))
3680         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3681                                              mode, speed), 0);
3682       else
3683         return new_cost (target_spill_cost [speed], 0);
3684       break;
3685
3686     default:
3687       gcc_unreachable ();
3688     }
3689
3690   cost = add_costs (cost, cost0);
3691   cost = add_costs (cost, cost1);
3692
3693   /* Bound the cost by target_spill_cost.  The parts of complicated
3694      computations often are either loop invariant or at least can
3695      be shared between several iv uses, so letting this grow without
3696      limits would not give reasonable results.  */
3697   if (cost.cost > (int) target_spill_cost [speed])
3698     cost.cost = target_spill_cost [speed];
3699
3700   return cost;
3701 }
3702
3703 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3704    invariants the computation depends on.  */
3705
3706 static comp_cost
3707 force_var_cost (struct ivopts_data *data,
3708                 tree expr, bitmap *depends_on)
3709 {
3710   if (depends_on)
3711     {
3712       fd_ivopts_data = data;
3713       walk_tree (&expr, find_depends, depends_on, NULL);
3714     }
3715
3716   return force_expr_to_var_cost (expr, data->speed);
3717 }
3718
3719 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3720    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3721    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3722    invariants the computation depends on.  */
3723
3724 static comp_cost
3725 split_address_cost (struct ivopts_data *data,
3726                     tree addr, bool *symbol_present, bool *var_present,
3727                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3728 {
3729   tree core;
3730   HOST_WIDE_INT bitsize;
3731   HOST_WIDE_INT bitpos;
3732   tree toffset;
3733   enum machine_mode mode;
3734   int unsignedp, volatilep;
3735
3736   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3737                               &unsignedp, &volatilep, false);
3738
3739   if (toffset != 0
3740       || bitpos % BITS_PER_UNIT != 0
3741       || TREE_CODE (core) != VAR_DECL)
3742     {
3743       *symbol_present = false;
3744       *var_present = true;
3745       fd_ivopts_data = data;
3746       walk_tree (&addr, find_depends, depends_on, NULL);
3747       return new_cost (target_spill_cost[data->speed], 0);
3748     }
3749
3750   *offset += bitpos / BITS_PER_UNIT;
3751   if (TREE_STATIC (core)
3752       || DECL_EXTERNAL (core))
3753     {
3754       *symbol_present = true;
3755       *var_present = false;
3756       return no_cost;
3757     }
3758
3759   *symbol_present = false;
3760   *var_present = true;
3761   return no_cost;
3762 }
3763
3764 /* Estimates cost of expressing difference of addresses E1 - E2 as
3765    var + symbol + offset.  The value of offset is added to OFFSET,
3766    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3767    part is missing.  DEPENDS_ON is a set of the invariants the computation
3768    depends on.  */
3769
3770 static comp_cost
3771 ptr_difference_cost (struct ivopts_data *data,
3772                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3773                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3774 {
3775   HOST_WIDE_INT diff = 0;
3776   aff_tree aff_e1, aff_e2;
3777   tree type;
3778
3779   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3780
3781   if (ptr_difference_const (e1, e2, &diff))
3782     {
3783       *offset += diff;
3784       *symbol_present = false;
3785       *var_present = false;
3786       return no_cost;
3787     }
3788
3789   if (integer_zerop (e2))
3790     return split_address_cost (data, TREE_OPERAND (e1, 0),
3791                                symbol_present, var_present, offset, depends_on);
3792
3793   *symbol_present = false;
3794   *var_present = true;
3795
3796   type = signed_type_for (TREE_TYPE (e1));
3797   tree_to_aff_combination (e1, type, &aff_e1);
3798   tree_to_aff_combination (e2, type, &aff_e2);
3799   aff_combination_scale (&aff_e2, double_int_minus_one);
3800   aff_combination_add (&aff_e1, &aff_e2);
3801
3802   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3803 }
3804
3805 /* Estimates cost of expressing difference E1 - E2 as
3806    var + symbol + offset.  The value of offset is added to OFFSET,
3807    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3808    part is missing.  DEPENDS_ON is a set of the invariants the computation
3809    depends on.  */
3810
3811 static comp_cost
3812 difference_cost (struct ivopts_data *data,
3813                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3814                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3815 {
3816   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3817   unsigned HOST_WIDE_INT off1, off2;
3818   aff_tree aff_e1, aff_e2;
3819   tree type;
3820
3821   e1 = strip_offset (e1, &off1);
3822   e2 = strip_offset (e2, &off2);
3823   *offset += off1 - off2;
3824
3825   STRIP_NOPS (e1);
3826   STRIP_NOPS (e2);
3827
3828   if (TREE_CODE (e1) == ADDR_EXPR)
3829     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3830                                 offset, depends_on);
3831   *symbol_present = false;
3832
3833   if (operand_equal_p (e1, e2, 0))
3834     {
3835       *var_present = false;
3836       return no_cost;
3837     }
3838
3839   *var_present = true;
3840
3841   if (integer_zerop (e2))
3842     return force_var_cost (data, e1, depends_on);
3843
3844   if (integer_zerop (e1))
3845     {
3846       comp_cost cost = force_var_cost (data, e2, depends_on);
3847       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3848       return cost;
3849     }
3850
3851   type = signed_type_for (TREE_TYPE (e1));
3852   tree_to_aff_combination (e1, type, &aff_e1);
3853   tree_to_aff_combination (e2, type, &aff_e2);
3854   aff_combination_scale (&aff_e2, double_int_minus_one);
3855   aff_combination_add (&aff_e1, &aff_e2);
3856
3857   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3858 }
3859
3860 /* Returns true if AFF1 and AFF2 are identical.  */
3861
3862 static bool
3863 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3864 {
3865   unsigned i;
3866
3867   if (aff1->n != aff2->n)
3868     return false;
3869
3870   for (i = 0; i < aff1->n; i++)
3871     {
3872       if (aff1->elts[i].coef != aff2->elts[i].coef)
3873         return false;
3874
3875       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3876         return false;
3877     }
3878   return true;
3879 }
3880
3881 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3882
3883 static int
3884 get_expr_id (struct ivopts_data *data, tree expr)
3885 {
3886   struct iv_inv_expr_ent ent;
3887   struct iv_inv_expr_ent **slot;
3888
3889   ent.expr = expr;
3890   ent.hash = iterative_hash_expr (expr, 0);
3891   slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3892   if (*slot)
3893     return (*slot)->id;
3894
3895   *slot = XNEW (struct iv_inv_expr_ent);
3896   (*slot)->expr = expr;
3897   (*slot)->hash = ent.hash;
3898   (*slot)->id = data->inv_expr_id++;
3899   return (*slot)->id;
3900 }
3901
3902 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3903    requires a new compiler generated temporary.  Returns -1 otherwise.
3904    ADDRESS_P is a flag indicating if the expression is for address
3905    computation.  */
3906
3907 static int
3908 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3909                             tree cbase, HOST_WIDE_INT ratio,
3910                             bool address_p)
3911 {
3912   aff_tree ubase_aff, cbase_aff;
3913   tree expr, ub, cb;
3914
3915   STRIP_NOPS (ubase);
3916   STRIP_NOPS (cbase);
3917   ub = ubase;
3918   cb = cbase;
3919
3920   if ((TREE_CODE (ubase) == INTEGER_CST)
3921       && (TREE_CODE (cbase) == INTEGER_CST))
3922     return -1;
3923
3924   /* Strips the constant part. */
3925   if (TREE_CODE (ubase) == PLUS_EXPR
3926       || TREE_CODE (ubase) == MINUS_EXPR
3927       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3928     {
3929       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3930         ubase = TREE_OPERAND (ubase, 0);
3931     }
3932
3933   /* Strips the constant part. */
3934   if (TREE_CODE (cbase) == PLUS_EXPR
3935       || TREE_CODE (cbase) == MINUS_EXPR
3936       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3937     {
3938       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3939         cbase = TREE_OPERAND (cbase, 0);
3940     }
3941
3942   if (address_p)
3943     {
3944       if (((TREE_CODE (ubase) == SSA_NAME)
3945            || (TREE_CODE (ubase) == ADDR_EXPR
3946                && is_gimple_min_invariant (ubase)))
3947           && (TREE_CODE (cbase) == INTEGER_CST))
3948         return -1;
3949
3950       if (((TREE_CODE (cbase) == SSA_NAME)
3951            || (TREE_CODE (cbase) == ADDR_EXPR
3952                && is_gimple_min_invariant (cbase)))
3953           && (TREE_CODE (ubase) == INTEGER_CST))
3954         return -1;
3955     }
3956
3957   if (ratio == 1)
3958     {
3959       if (operand_equal_p (ubase, cbase, 0))
3960         return -1;
3961
3962       if (TREE_CODE (ubase) == ADDR_EXPR
3963           && TREE_CODE (cbase) == ADDR_EXPR)
3964         {
3965           tree usym, csym;
3966
3967           usym = TREE_OPERAND (ubase, 0);
3968           csym = TREE_OPERAND (cbase, 0);
3969           if (TREE_CODE (usym) == ARRAY_REF)
3970             {
3971               tree ind = TREE_OPERAND (usym, 1);
3972               if (TREE_CODE (ind) == INTEGER_CST
3973                   && tree_fits_shwi_p (ind)
3974                   && TREE_INT_CST_LOW (ind) == 0)
3975                 usym = TREE_OPERAND (usym, 0);
3976             }
3977           if (TREE_CODE (csym) == ARRAY_REF)
3978             {
3979               tree ind = TREE_OPERAND (csym, 1);
3980               if (TREE_CODE (ind) == INTEGER_CST
3981                   && tree_fits_shwi_p (ind)
3982                   && TREE_INT_CST_LOW (ind) == 0)
3983                 csym = TREE_OPERAND (csym, 0);
3984             }
3985           if (operand_equal_p (usym, csym, 0))
3986             return -1;
3987         }
3988       /* Now do more complex comparison  */
3989       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3990       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3991       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3992         return -1;
3993     }
3994
3995   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3996   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3997
3998   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3999   aff_combination_add (&ubase_aff, &cbase_aff);
4000   expr = aff_combination_to_tree (&ubase_aff);
4001   return get_expr_id (data, expr);
4002 }
4003
4004
4005
4006 /* Determines the cost of the computation by that USE is expressed
4007    from induction variable CAND.  If ADDRESS_P is true, we just need
4008    to create an address from it, otherwise we want to get it into
4009    register.  A set of invariants we depend on is stored in
4010    DEPENDS_ON.  AT is the statement at that the value is computed.
4011    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4012    addressing is likely.  */
4013
4014 static comp_cost
4015 get_computation_cost_at (struct ivopts_data *data,
4016                          struct iv_use *use, struct iv_cand *cand,
4017                          bool address_p, bitmap *depends_on, gimple at,
4018                          bool *can_autoinc,
4019                          int *inv_expr_id)
4020 {
4021   tree ubase = use->iv->base, ustep = use->iv->step;
4022   tree cbase, cstep;
4023   tree utype = TREE_TYPE (ubase), ctype;
4024   unsigned HOST_WIDE_INT cstepi, offset = 0;
4025   HOST_WIDE_INT ratio, aratio;
4026   bool var_present, symbol_present, stmt_is_after_inc;
4027   comp_cost cost;
4028   double_int rat;
4029   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4030   enum machine_mode mem_mode = (address_p
4031                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4032                                 : VOIDmode);
4033
4034   *depends_on = NULL;
4035
4036   /* Only consider real candidates.  */
4037   if (!cand->iv)
4038     return infinite_cost;
4039
4040   cbase = cand->iv->base;
4041   cstep = cand->iv->step;
4042   ctype = TREE_TYPE (cbase);
4043
4044   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4045     {
4046       /* We do not have a precision to express the values of use.  */
4047       return infinite_cost;
4048     }
4049
4050   if (address_p
4051       || (use->iv->base_object
4052           && cand->iv->base_object
4053           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4054           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4055     {
4056       /* Do not try to express address of an object with computation based
4057          on address of a different object.  This may cause problems in rtl
4058          level alias analysis (that does not expect this to be happening,
4059          as this is illegal in C), and would be unlikely to be useful
4060          anyway.  */
4061       if (use->iv->base_object
4062           && cand->iv->base_object
4063           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4064         return infinite_cost;
4065     }
4066
4067   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4068     {
4069       /* TODO -- add direct handling of this case.  */
4070       goto fallback;
4071     }
4072
4073   /* CSTEPI is removed from the offset in case statement is after the
4074      increment.  If the step is not constant, we use zero instead.
4075      This is a bit imprecise (there is the extra addition), but
4076      redundancy elimination is likely to transform the code so that
4077      it uses value of the variable before increment anyway,
4078      so it is not that much unrealistic.  */
4079   if (cst_and_fits_in_hwi (cstep))
4080     cstepi = int_cst_value (cstep);
4081   else
4082     cstepi = 0;
4083
4084   if (!constant_multiple_of (ustep, cstep, &rat))
4085     return infinite_cost;
4086
4087   if (rat.fits_shwi ())
4088     ratio = rat.to_shwi ();
4089   else
4090     return infinite_cost;
4091
4092   STRIP_NOPS (cbase);
4093   ctype = TREE_TYPE (cbase);
4094
4095   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4096
4097   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4098      or ratio == 1, it is better to handle this like
4099
4100      ubase - ratio * cbase + ratio * var
4101
4102      (also holds in the case ratio == -1, TODO.  */
4103
4104   if (cst_and_fits_in_hwi (cbase))
4105     {
4106       offset = - ratio * int_cst_value (cbase);
4107       cost = difference_cost (data,
4108                               ubase, build_int_cst (utype, 0),
4109                               &symbol_present, &var_present, &offset,
4110                               depends_on);
4111       cost.cost /= avg_loop_niter (data->current_loop);
4112     }
4113   else if (ratio == 1)
4114     {
4115       tree real_cbase = cbase;
4116
4117       /* Check to see if any adjustment is needed.  */
4118       if (cstepi == 0 && stmt_is_after_inc)
4119         {
4120           aff_tree real_cbase_aff;
4121           aff_tree cstep_aff;
4122
4123           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4124                                    &real_cbase_aff);
4125           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4126
4127           aff_combination_add (&real_cbase_aff, &cstep_aff);
4128           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4129         }
4130
4131       cost = difference_cost (data,
4132                               ubase, real_cbase,
4133                               &symbol_present, &var_present, &offset,
4134                               depends_on);
4135       cost.cost /= avg_loop_niter (data->current_loop);
4136     }
4137   else if (address_p
4138            && !POINTER_TYPE_P (ctype)
4139            && multiplier_allowed_in_address_p
4140                 (ratio, mem_mode,
4141                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4142     {
4143       cbase
4144         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4145       cost = difference_cost (data,
4146                               ubase, cbase,
4147                               &symbol_present, &var_present, &offset,
4148                               depends_on);
4149       cost.cost /= avg_loop_niter (data->current_loop);
4150     }
4151   else
4152     {
4153       cost = force_var_cost (data, cbase, depends_on);
4154       cost = add_costs (cost,
4155                         difference_cost (data,
4156                                          ubase, build_int_cst (utype, 0),
4157                                          &symbol_present, &var_present,
4158                                          &offset, depends_on));
4159       cost.cost /= avg_loop_niter (data->current_loop);
4160       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4161     }
4162
4163   if (inv_expr_id)
4164     {
4165       *inv_expr_id =
4166           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4167       /* Clear depends on.  */
4168       if (*inv_expr_id != -1 && depends_on && *depends_on)
4169         bitmap_clear (*depends_on);
4170     }
4171
4172   /* If we are after the increment, the value of the candidate is higher by
4173      one iteration.  */
4174   if (stmt_is_after_inc)
4175     offset -= ratio * cstepi;
4176
4177   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4178      (symbol/var1/const parts may be omitted).  If we are looking for an
4179      address, find the cost of addressing this.  */
4180   if (address_p)
4181     return add_costs (cost,
4182                       get_address_cost (symbol_present, var_present,
4183                                         offset, ratio, cstepi,
4184                                         mem_mode,
4185                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4186                                         speed, stmt_is_after_inc,
4187                                         can_autoinc));
4188
4189   /* Otherwise estimate the costs for computing the expression.  */
4190   if (!symbol_present && !var_present && !offset)
4191     {
4192       if (ratio != 1)
4193         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4194       return cost;
4195     }
4196
4197   /* Symbol + offset should be compile-time computable so consider that they
4198       are added once to the variable, if present.  */
4199   if (var_present && (symbol_present || offset))
4200     cost.cost += adjust_setup_cost (data,
4201                                     add_cost (speed, TYPE_MODE (ctype)));
4202
4203   /* Having offset does not affect runtime cost in case it is added to
4204      symbol, but it increases complexity.  */
4205   if (offset)
4206     cost.complexity++;
4207
4208   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4209
4210   aratio = ratio > 0 ? ratio : -ratio;
4211   if (aratio != 1)
4212     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4213   return cost;
4214
4215 fallback:
4216   if (can_autoinc)
4217     *can_autoinc = false;
4218
4219   {
4220     /* Just get the expression, expand it and measure the cost.  */
4221     tree comp = get_computation_at (data->current_loop, use, cand, at);
4222
4223     if (!comp)
4224       return infinite_cost;
4225
4226     if (address_p)
4227       comp = build_simple_mem_ref (comp);
4228
4229     return new_cost (computation_cost (comp, speed), 0);
4230   }
4231 }
4232
4233 /* Determines the cost of the computation by that USE is expressed
4234    from induction variable CAND.  If ADDRESS_P is true, we just need
4235    to create an address from it, otherwise we want to get it into
4236    register.  A set of invariants we depend on is stored in
4237    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4238    autoinc addressing is likely.  */
4239
4240 static comp_cost
4241 get_computation_cost (struct ivopts_data *data,
4242                       struct iv_use *use, struct iv_cand *cand,
4243                       bool address_p, bitmap *depends_on,
4244                       bool *can_autoinc, int *inv_expr_id)
4245 {
4246   return get_computation_cost_at (data,
4247                                   use, cand, address_p, depends_on, use->stmt,
4248                                   can_autoinc, inv_expr_id);
4249 }
4250
4251 /* Determines cost of basing replacement of USE on CAND in a generic
4252    expression.  */
4253
4254 static bool
4255 determine_use_iv_cost_generic (struct ivopts_data *data,
4256                                struct iv_use *use, struct iv_cand *cand)
4257 {
4258   bitmap depends_on;
4259   comp_cost cost;
4260   int inv_expr_id = -1;
4261
4262   /* The simple case first -- if we need to express value of the preserved
4263      original biv, the cost is 0.  This also prevents us from counting the
4264      cost of increment twice -- once at this use and once in the cost of
4265      the candidate.  */
4266   if (cand->pos == IP_ORIGINAL
4267       && cand->incremented_at == use->stmt)
4268     {
4269       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4270                        ERROR_MARK, -1);
4271       return true;
4272     }
4273
4274   cost = get_computation_cost (data, use, cand, false, &depends_on,
4275                                NULL, &inv_expr_id);
4276
4277   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4278                    inv_expr_id);
4279
4280   return !infinite_cost_p (cost);
4281 }
4282
4283 /* Determines cost of basing replacement of USE on CAND in an address.  */
4284
4285 static bool
4286 determine_use_iv_cost_address (struct ivopts_data *data,
4287                                struct iv_use *use, struct iv_cand *cand)
4288 {
4289   bitmap depends_on;
4290   bool can_autoinc;
4291   int inv_expr_id = -1;
4292   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4293                                          &can_autoinc, &inv_expr_id);
4294
4295   if (cand->ainc_use == use)
4296     {
4297       if (can_autoinc)
4298         cost.cost -= cand->cost_step;
4299       /* If we generated the candidate solely for exploiting autoincrement
4300          opportunities, and it turns out it can't be used, set the cost to
4301          infinity to make sure we ignore it.  */
4302       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4303         cost = infinite_cost;
4304     }
4305   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4306                    inv_expr_id);
4307
4308   return !infinite_cost_p (cost);
4309 }
4310
4311 /* Computes value of candidate CAND at position AT in iteration NITER, and
4312    stores it to VAL.  */
4313
4314 static void
4315 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4316                aff_tree *val)
4317 {
4318   aff_tree step, delta, nit;
4319   struct iv *iv = cand->iv;
4320   tree type = TREE_TYPE (iv->base);
4321   tree steptype = type;
4322   if (POINTER_TYPE_P (type))
4323     steptype = sizetype;
4324
4325   tree_to_aff_combination (iv->step, steptype, &step);
4326   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4327   aff_combination_convert (&nit, steptype);
4328   aff_combination_mult (&nit, &step, &delta);
4329   if (stmt_after_increment (loop, cand, at))
4330     aff_combination_add (&delta, &step);
4331
4332   tree_to_aff_combination (iv->base, type, val);
4333   aff_combination_add (val, &delta);
4334 }
4335
4336 /* Returns period of induction variable iv.  */
4337
4338 static tree
4339 iv_period (struct iv *iv)
4340 {
4341   tree step = iv->step, period, type;
4342   tree pow2div;
4343
4344   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4345
4346   type = unsigned_type_for (TREE_TYPE (step));
4347   /* Period of the iv is lcm (step, type_range)/step -1,
4348      i.e., N*type_range/step - 1. Since type range is power
4349      of two, N == (step >> num_of_ending_zeros_binary (step),
4350      so the final result is
4351
4352        (type_range >> num_of_ending_zeros_binary (step)) - 1
4353
4354   */
4355   pow2div = num_ending_zeros (step);
4356
4357   period = build_low_bits_mask (type,
4358                                 (TYPE_PRECISION (type)
4359                                  - tree_to_uhwi (pow2div)));
4360
4361   return period;
4362 }
4363
4364 /* Returns the comparison operator used when eliminating the iv USE.  */
4365
4366 static enum tree_code
4367 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4368 {
4369   struct loop *loop = data->current_loop;
4370   basic_block ex_bb;
4371   edge exit;
4372
4373   ex_bb = gimple_bb (use->stmt);
4374   exit = EDGE_SUCC (ex_bb, 0);
4375   if (flow_bb_inside_loop_p (loop, exit->dest))
4376     exit = EDGE_SUCC (ex_bb, 1);
4377
4378   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4379 }
4380
4381 static tree
4382 strip_wrap_conserving_type_conversions (tree exp)
4383 {
4384   while (tree_ssa_useless_type_conversion (exp)
4385          && (nowrap_type_p (TREE_TYPE (exp))
4386              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4387     exp = TREE_OPERAND (exp, 0);
4388   return exp;
4389 }
4390
4391 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4392    check for an exact match.  */
4393
4394 static bool
4395 expr_equal_p (tree e, tree what)
4396 {
4397   gimple stmt;
4398   enum tree_code code;
4399
4400   e = strip_wrap_conserving_type_conversions (e);
4401   what = strip_wrap_conserving_type_conversions (what);
4402
4403   code = TREE_CODE (what);
4404   if (TREE_TYPE (e) != TREE_TYPE (what))
4405     return false;
4406
4407   if (operand_equal_p (e, what, 0))
4408     return true;
4409
4410   if (TREE_CODE (e) != SSA_NAME)
4411     return false;
4412
4413   stmt = SSA_NAME_DEF_STMT (e);
4414   if (gimple_code (stmt) != GIMPLE_ASSIGN
4415       || gimple_assign_rhs_code (stmt) != code)
4416     return false;
4417
4418   switch (get_gimple_rhs_class (code))
4419     {
4420     case GIMPLE_BINARY_RHS:
4421       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4422         return false;
4423       /* Fallthru.  */
4424
4425     case GIMPLE_UNARY_RHS:
4426     case GIMPLE_SINGLE_RHS:
4427       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4428     default:
4429       return false;
4430     }
4431 }
4432
4433 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4434    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4435    calculation is performed in non-wrapping type.
4436
4437    TODO: More generally, we could test for the situation that
4438          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4439          This would require knowing the sign of OFFSET.
4440
4441          Also, we only look for the first addition in the computation of BASE.
4442          More complex analysis would be better, but introducing it just for
4443          this optimization seems like an overkill.  */
4444
4445 static bool
4446 difference_cannot_overflow_p (tree base, tree offset)
4447 {
4448   enum tree_code code;
4449   tree e1, e2;
4450
4451   if (!nowrap_type_p (TREE_TYPE (base)))
4452     return false;
4453
4454   base = expand_simple_operations (base);
4455
4456   if (TREE_CODE (base) == SSA_NAME)
4457     {
4458       gimple stmt = SSA_NAME_DEF_STMT (base);
4459
4460       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4461         return false;
4462
4463       code = gimple_assign_rhs_code (stmt);
4464       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4465         return false;
4466
4467       e1 = gimple_assign_rhs1 (stmt);
4468       e2 = gimple_assign_rhs2 (stmt);
4469     }
4470   else
4471     {
4472       code = TREE_CODE (base);
4473       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4474         return false;
4475       e1 = TREE_OPERAND (base, 0);
4476       e2 = TREE_OPERAND (base, 1);
4477     }
4478
4479   /* TODO: deeper inspection may be necessary to prove the equality.  */
4480   switch (code)
4481     {
4482     case PLUS_EXPR:
4483       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4484     case POINTER_PLUS_EXPR:
4485       return expr_equal_p (e2, offset);
4486
4487     default:
4488       return false;
4489     }
4490 }
4491
4492 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4493    comparison with CAND.  NITER describes the number of iterations of
4494    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4495
4496    We aim to handle the following situation:
4497
4498    sometype *base, *p;
4499    int a, b, i;
4500
4501    i = a;
4502    p = p_0 = base + a;
4503
4504    do
4505      {
4506        bla (*p);
4507        p++;
4508        i++;
4509      }
4510    while (i < b);
4511
4512    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4513    We aim to optimize this to
4514
4515    p = p_0 = base + a;
4516    do
4517      {
4518        bla (*p);
4519        p++;
4520      }
4521    while (p < p_0 - a + b);
4522
4523    This preserves the correctness, since the pointer arithmetics does not
4524    overflow.  More precisely:
4525
4526    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4527       overflow in computing it or the values of p.
4528    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4529       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4530
4531 static bool
4532 iv_elimination_compare_lt (struct ivopts_data *data,
4533                            struct iv_cand *cand, enum tree_code *comp_p,
4534                            struct tree_niter_desc *niter)
4535 {
4536   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4537   struct affine_tree_combination nit, tmpa, tmpb;
4538   enum tree_code comp;
4539   HOST_WIDE_INT step;
4540
4541   /* We need to know that the candidate induction variable does not overflow.
4542      While more complex analysis may be used to prove this, for now just
4543      check that the variable appears in the original program and that it
4544      is computed in a type that guarantees no overflows.  */
4545   cand_type = TREE_TYPE (cand->iv->base);
4546   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4547     return false;
4548
4549   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4550      the calculation of the BOUND could overflow, making the comparison
4551      invalid.  */
4552   if (!data->loop_single_exit_p)
4553     return false;
4554
4555   /* We need to be able to decide whether candidate is increasing or decreasing
4556      in order to choose the right comparison operator.  */
4557   if (!cst_and_fits_in_hwi (cand->iv->step))
4558     return false;
4559   step = int_cst_value (cand->iv->step);
4560
4561   /* Check that the number of iterations matches the expected pattern:
4562      a + 1 > b ? 0 : b - a - 1.  */
4563   mbz = niter->may_be_zero;
4564   if (TREE_CODE (mbz) == GT_EXPR)
4565     {
4566       /* Handle a + 1 > b.  */
4567       tree op0 = TREE_OPERAND (mbz, 0);
4568       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4569         {
4570           a = TREE_OPERAND (op0, 0);
4571           b = TREE_OPERAND (mbz, 1);
4572         }
4573       else
4574         return false;
4575     }
4576   else if (TREE_CODE (mbz) == LT_EXPR)
4577     {
4578       tree op1 = TREE_OPERAND (mbz, 1);
4579
4580       /* Handle b < a + 1.  */
4581       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4582         {
4583           a = TREE_OPERAND (op1, 0);
4584           b = TREE_OPERAND (mbz, 0);
4585         }
4586       else
4587         return false;
4588     }
4589   else
4590     return false;
4591
4592   /* Expected number of iterations is B - A - 1.  Check that it matches
4593      the actual number, i.e., that B - A - NITER = 1.  */
4594   tree_to_aff_combination (niter->niter, nit_type, &nit);
4595   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4596   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4597   aff_combination_scale (&nit, double_int_minus_one);
4598   aff_combination_scale (&tmpa, double_int_minus_one);
4599   aff_combination_add (&tmpb, &tmpa);
4600   aff_combination_add (&tmpb, &nit);
4601   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4602     return false;
4603
4604   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4605      overflow.  */
4606   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4607                         cand->iv->step,
4608                         fold_convert (TREE_TYPE (cand->iv->step), a));
4609   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4610     return false;
4611
4612   /* Determine the new comparison operator.  */
4613   comp = step < 0 ? GT_EXPR : LT_EXPR;
4614   if (*comp_p == NE_EXPR)
4615     *comp_p = comp;
4616   else if (*comp_p == EQ_EXPR)
4617     *comp_p = invert_tree_comparison (comp, false);
4618   else
4619     gcc_unreachable ();
4620
4621   return true;
4622 }
4623
4624 /* Check whether it is possible to express the condition in USE by comparison
4625    of candidate CAND.  If so, store the value compared with to BOUND, and the
4626    comparison operator to COMP.  */
4627
4628 static bool
4629 may_eliminate_iv (struct ivopts_data *data,
4630                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4631                   enum tree_code *comp)
4632 {
4633   basic_block ex_bb;
4634   edge exit;
4635   tree period;
4636   struct loop *loop = data->current_loop;
4637   aff_tree bnd;
4638   struct tree_niter_desc *desc = NULL;
4639
4640   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4641     return false;
4642
4643   /* For now works only for exits that dominate the loop latch.
4644      TODO: extend to other conditions inside loop body.  */
4645   ex_bb = gimple_bb (use->stmt);
4646   if (use->stmt != last_stmt (ex_bb)
4647       || gimple_code (use->stmt) != GIMPLE_COND
4648       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4649     return false;
4650
4651   exit = EDGE_SUCC (ex_bb, 0);
4652   if (flow_bb_inside_loop_p (loop, exit->dest))
4653     exit = EDGE_SUCC (ex_bb, 1);
4654   if (flow_bb_inside_loop_p (loop, exit->dest))
4655     return false;
4656
4657   desc = niter_for_exit (data, exit);
4658   if (!desc)
4659     return false;
4660
4661   /* Determine whether we can use the variable to test the exit condition.
4662      This is the case iff the period of the induction variable is greater
4663      than the number of iterations for which the exit condition is true.  */
4664   period = iv_period (cand->iv);
4665
4666   /* If the number of iterations is constant, compare against it directly.  */
4667   if (TREE_CODE (desc->niter) == INTEGER_CST)
4668     {
4669       /* See cand_value_at.  */
4670       if (stmt_after_increment (loop, cand, use->stmt))
4671         {
4672           if (!tree_int_cst_lt (desc->niter, period))
4673             return false;
4674         }
4675       else
4676         {
4677           if (tree_int_cst_lt (period, desc->niter))
4678             return false;
4679         }
4680     }
4681
4682   /* If not, and if this is the only possible exit of the loop, see whether
4683      we can get a conservative estimate on the number of iterations of the
4684      entire loop and compare against that instead.  */
4685   else
4686     {
4687       double_int period_value, max_niter;
4688
4689       max_niter = desc->max;
4690       if (stmt_after_increment (loop, cand, use->stmt))
4691         max_niter += double_int_one;
4692       period_value = tree_to_double_int (period);
4693       if (max_niter.ugt (period_value))
4694         {
4695           /* See if we can take advantage of inferred loop bound information.  */
4696           if (data->loop_single_exit_p)
4697             {
4698               if (!max_loop_iterations (loop, &max_niter))
4699                 return false;
4700               /* The loop bound is already adjusted by adding 1.  */
4701               if (max_niter.ugt (period_value))
4702                 return false;
4703             }
4704           else
4705             return false;
4706         }
4707     }
4708
4709   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4710
4711   *bound = aff_combination_to_tree (&bnd);
4712   *comp = iv_elimination_compare (data, use);
4713
4714   /* It is unlikely that computing the number of iterations using division
4715      would be more profitable than keeping the original induction variable.  */
4716   if (expression_expensive_p (*bound))
4717     return false;
4718
4719   /* Sometimes, it is possible to handle the situation that the number of
4720      iterations may be zero unless additional assumtions by using <
4721      instead of != in the exit condition.
4722
4723      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4724            base the exit condition on it.  However, that is often too
4725            expensive.  */
4726   if (!integer_zerop (desc->may_be_zero))
4727     return iv_elimination_compare_lt (data, cand, comp, desc);
4728
4729   return true;
4730 }
4731
4732  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4733     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4734
4735 static int
4736 parm_decl_cost (struct ivopts_data *data, tree bound)
4737 {
4738   tree sbound = bound;
4739   STRIP_NOPS (sbound);
4740
4741   if (TREE_CODE (sbound) == SSA_NAME
4742       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4743       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4744       && data->body_includes_call)
4745     return COSTS_N_INSNS (1);
4746
4747   return 0;
4748 }
4749
4750 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4751
4752 static bool
4753 determine_use_iv_cost_condition (struct ivopts_data *data,
4754                                  struct iv_use *use, struct iv_cand *cand)
4755 {
4756   tree bound = NULL_TREE;
4757   struct iv *cmp_iv;
4758   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4759   comp_cost elim_cost, express_cost, cost, bound_cost;
4760   bool ok;
4761   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4762   tree *control_var, *bound_cst;
4763   enum tree_code comp = ERROR_MARK;
4764
4765   /* Only consider real candidates.  */
4766   if (!cand->iv)
4767     {
4768       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4769                        ERROR_MARK, -1);
4770       return false;
4771     }
4772
4773   /* Try iv elimination.  */
4774   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4775     {
4776       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4777       if (elim_cost.cost == 0)
4778         elim_cost.cost = parm_decl_cost (data, bound);
4779       else if (TREE_CODE (bound) == INTEGER_CST)
4780         elim_cost.cost = 0;
4781       /* If we replace a loop condition 'i < n' with 'p < base + n',
4782          depends_on_elim will have 'base' and 'n' set, which implies
4783          that both 'base' and 'n' will be live during the loop.  More likely,
4784          'base + n' will be loop invariant, resulting in only one live value
4785          during the loop.  So in that case we clear depends_on_elim and set
4786         elim_inv_expr_id instead.  */
4787       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4788         {
4789           elim_inv_expr_id = get_expr_id (data, bound);
4790           bitmap_clear (depends_on_elim);
4791         }
4792       /* The bound is a loop invariant, so it will be only computed
4793          once.  */
4794       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4795     }
4796   else
4797     elim_cost = infinite_cost;
4798
4799   /* Try expressing the original giv.  If it is compared with an invariant,
4800      note that we cannot get rid of it.  */
4801   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4802                               NULL, &cmp_iv);
4803   gcc_assert (ok);
4804
4805   /* When the condition is a comparison of the candidate IV against
4806      zero, prefer this IV.
4807
4808      TODO: The constant that we're subtracting from the cost should
4809      be target-dependent.  This information should be added to the
4810      target costs for each backend.  */
4811   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4812       && integer_zerop (*bound_cst)
4813       && (operand_equal_p (*control_var, cand->var_after, 0)
4814           || operand_equal_p (*control_var, cand->var_before, 0)))
4815     elim_cost.cost -= 1;
4816
4817   express_cost = get_computation_cost (data, use, cand, false,
4818                                        &depends_on_express, NULL,
4819                                        &express_inv_expr_id);
4820   fd_ivopts_data = data;
4821   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4822
4823   /* Count the cost of the original bound as well.  */
4824   bound_cost = force_var_cost (data, *bound_cst, NULL);
4825   if (bound_cost.cost == 0)
4826     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4827   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4828     bound_cost.cost = 0;
4829   express_cost.cost += bound_cost.cost;
4830
4831   /* Choose the better approach, preferring the eliminated IV. */
4832   if (compare_costs (elim_cost, express_cost) <= 0)
4833     {
4834       cost = elim_cost;
4835       depends_on = depends_on_elim;
4836       depends_on_elim = NULL;
4837       inv_expr_id = elim_inv_expr_id;
4838     }
4839   else
4840     {
4841       cost = express_cost;
4842       depends_on = depends_on_express;
4843       depends_on_express = NULL;
4844       bound = NULL_TREE;
4845       comp = ERROR_MARK;
4846       inv_expr_id = express_inv_expr_id;
4847     }
4848
4849   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4850
4851   if (depends_on_elim)
4852     BITMAP_FREE (depends_on_elim);
4853   if (depends_on_express)
4854     BITMAP_FREE (depends_on_express);
4855
4856   return !infinite_cost_p (cost);
4857 }
4858
4859 /* Determines cost of basing replacement of USE on CAND.  Returns false
4860    if USE cannot be based on CAND.  */
4861
4862 static bool
4863 determine_use_iv_cost (struct ivopts_data *data,
4864                        struct iv_use *use, struct iv_cand *cand)
4865 {
4866   switch (use->type)
4867     {
4868     case USE_NONLINEAR_EXPR:
4869       return determine_use_iv_cost_generic (data, use, cand);
4870
4871     case USE_ADDRESS:
4872       return determine_use_iv_cost_address (data, use, cand);
4873
4874     case USE_COMPARE:
4875       return determine_use_iv_cost_condition (data, use, cand);
4876
4877     default:
4878       gcc_unreachable ();
4879     }
4880 }
4881
4882 /* Return true if get_computation_cost indicates that autoincrement is
4883    a possibility for the pair of USE and CAND, false otherwise.  */
4884
4885 static bool
4886 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4887                            struct iv_cand *cand)
4888 {
4889   bitmap depends_on;
4890   bool can_autoinc;
4891   comp_cost cost;
4892
4893   if (use->type != USE_ADDRESS)
4894     return false;
4895
4896   cost = get_computation_cost (data, use, cand, true, &depends_on,
4897                                &can_autoinc, NULL);
4898
4899   BITMAP_FREE (depends_on);
4900
4901   return !infinite_cost_p (cost) && can_autoinc;
4902 }
4903
4904 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4905    use that allows autoincrement, and set their AINC_USE if possible.  */
4906
4907 static void
4908 set_autoinc_for_original_candidates (struct ivopts_data *data)
4909 {
4910   unsigned i, j;
4911
4912   for (i = 0; i < n_iv_cands (data); i++)
4913     {
4914       struct iv_cand *cand = iv_cand (data, i);
4915       struct iv_use *closest_before = NULL;
4916       struct iv_use *closest_after = NULL;
4917       if (cand->pos != IP_ORIGINAL)
4918         continue;
4919
4920       for (j = 0; j < n_iv_uses (data); j++)
4921         {
4922           struct iv_use *use = iv_use (data, j);
4923           unsigned uid = gimple_uid (use->stmt);
4924
4925           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4926             continue;
4927
4928           if (uid < gimple_uid (cand->incremented_at)
4929               && (closest_before == NULL
4930                   || uid > gimple_uid (closest_before->stmt)))
4931             closest_before = use;
4932
4933           if (uid > gimple_uid (cand->incremented_at)
4934               && (closest_after == NULL
4935                   || uid < gimple_uid (closest_after->stmt)))
4936             closest_after = use;
4937         }
4938
4939       if (closest_before != NULL
4940           && autoinc_possible_for_pair (data, closest_before, cand))
4941         cand->ainc_use = closest_before;
4942       else if (closest_after != NULL
4943                && autoinc_possible_for_pair (data, closest_after, cand))
4944         cand->ainc_use = closest_after;
4945     }
4946 }
4947
4948 /* Finds the candidates for the induction variables.  */
4949
4950 static void
4951 find_iv_candidates (struct ivopts_data *data)
4952 {
4953   /* Add commonly used ivs.  */
4954   add_standard_iv_candidates (data);
4955
4956   /* Add old induction variables.  */
4957   add_old_ivs_candidates (data);
4958
4959   /* Add induction variables derived from uses.  */
4960   add_derived_ivs_candidates (data);
4961
4962   set_autoinc_for_original_candidates (data);
4963
4964   /* Record the important candidates.  */
4965   record_important_candidates (data);
4966 }
4967
4968 /* Determines costs of basing the use of the iv on an iv candidate.  */
4969
4970 static void
4971 determine_use_iv_costs (struct ivopts_data *data)
4972 {
4973   unsigned i, j;
4974   struct iv_use *use;
4975   struct iv_cand *cand;
4976   bitmap to_clear = BITMAP_ALLOC (NULL);
4977
4978   alloc_use_cost_map (data);
4979
4980   for (i = 0; i < n_iv_uses (data); i++)
4981     {
4982       use = iv_use (data, i);
4983
4984       if (data->consider_all_candidates)
4985         {
4986           for (j = 0; j < n_iv_cands (data); j++)
4987             {
4988               cand = iv_cand (data, j);
4989               determine_use_iv_cost (data, use, cand);
4990             }
4991         }
4992       else
4993         {
4994           bitmap_iterator bi;
4995
4996           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4997             {
4998               cand = iv_cand (data, j);
4999               if (!determine_use_iv_cost (data, use, cand))
5000                 bitmap_set_bit (to_clear, j);
5001             }
5002
5003           /* Remove the candidates for that the cost is infinite from
5004              the list of related candidates.  */
5005           bitmap_and_compl_into (use->related_cands, to_clear);
5006           bitmap_clear (to_clear);
5007         }
5008     }
5009
5010   BITMAP_FREE (to_clear);
5011
5012   if (dump_file && (dump_flags & TDF_DETAILS))
5013     {
5014       fprintf (dump_file, "Use-candidate costs:\n");
5015
5016       for (i = 0; i < n_iv_uses (data); i++)
5017         {
5018           use = iv_use (data, i);
5019
5020           fprintf (dump_file, "Use %d:\n", i);
5021           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5022           for (j = 0; j < use->n_map_members; j++)
5023             {
5024               if (!use->cost_map[j].cand
5025                   || infinite_cost_p (use->cost_map[j].cost))
5026                 continue;
5027
5028               fprintf (dump_file, "  %d\t%d\t%d\t",
5029                        use->cost_map[j].cand->id,
5030                        use->cost_map[j].cost.cost,
5031                        use->cost_map[j].cost.complexity);
5032               if (use->cost_map[j].depends_on)
5033                 bitmap_print (dump_file,
5034                               use->cost_map[j].depends_on, "","");
5035               if (use->cost_map[j].inv_expr_id != -1)
5036                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5037               fprintf (dump_file, "\n");
5038             }
5039
5040           fprintf (dump_file, "\n");
5041         }
5042       fprintf (dump_file, "\n");
5043     }
5044 }
5045
5046 /* Determines cost of the candidate CAND.  */
5047
5048 static void
5049 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5050 {
5051   comp_cost cost_base;
5052   unsigned cost, cost_step;
5053   tree base;
5054
5055   if (!cand->iv)
5056     {
5057       cand->cost = 0;
5058       return;
5059     }
5060
5061   /* There are two costs associated with the candidate -- its increment
5062      and its initialization.  The second is almost negligible for any loop
5063      that rolls enough, so we take it just very little into account.  */
5064
5065   base = cand->iv->base;
5066   cost_base = force_var_cost (data, base, NULL);
5067   /* It will be exceptional that the iv register happens to be initialized with
5068      the proper value at no cost.  In general, there will at least be a regcopy
5069      or a const set.  */
5070   if (cost_base.cost == 0)
5071     cost_base.cost = COSTS_N_INSNS (1);
5072   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5073
5074   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5075
5076   /* Prefer the original ivs unless we may gain something by replacing it.
5077      The reason is to make debugging simpler; so this is not relevant for
5078      artificial ivs created by other optimization passes.  */
5079   if (cand->pos != IP_ORIGINAL
5080       || !SSA_NAME_VAR (cand->var_before)
5081       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5082     cost++;
5083
5084   /* Prefer not to insert statements into latch unless there are some
5085      already (so that we do not create unnecessary jumps).  */
5086   if (cand->pos == IP_END
5087       && empty_block_p (ip_end_pos (data->current_loop)))
5088     cost++;
5089
5090   cand->cost = cost;
5091   cand->cost_step = cost_step;
5092 }
5093
5094 /* Determines costs of computation of the candidates.  */
5095
5096 static void
5097 determine_iv_costs (struct ivopts_data *data)
5098 {
5099   unsigned i;
5100
5101   if (dump_file && (dump_flags & TDF_DETAILS))
5102     {
5103       fprintf (dump_file, "Candidate costs:\n");
5104       fprintf (dump_file, "  cand\tcost\n");
5105     }
5106
5107   for (i = 0; i < n_iv_cands (data); i++)
5108     {
5109       struct iv_cand *cand = iv_cand (data, i);
5110
5111       determine_iv_cost (data, cand);
5112
5113       if (dump_file && (dump_flags & TDF_DETAILS))
5114         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5115     }
5116
5117   if (dump_file && (dump_flags & TDF_DETAILS))
5118     fprintf (dump_file, "\n");
5119 }
5120
5121 /* Calculates cost for having SIZE induction variables.  */
5122
5123 static unsigned
5124 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5125 {
5126   /* We add size to the cost, so that we prefer eliminating ivs
5127      if possible.  */
5128   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5129                                             data->body_includes_call);
5130 }
5131
5132 /* For each size of the induction variable set determine the penalty.  */
5133
5134 static void
5135 determine_set_costs (struct ivopts_data *data)
5136 {
5137   unsigned j, n;
5138   gimple phi;
5139   gimple_stmt_iterator psi;
5140   tree op;
5141   struct loop *loop = data->current_loop;
5142   bitmap_iterator bi;
5143
5144   if (dump_file && (dump_flags & TDF_DETAILS))
5145     {
5146       fprintf (dump_file, "Global costs:\n");
5147       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5148       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5149       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5150       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5151     }
5152
5153   n = 0;
5154   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5155     {
5156       phi = gsi_stmt (psi);
5157       op = PHI_RESULT (phi);
5158
5159       if (virtual_operand_p (op))
5160         continue;
5161
5162       if (get_iv (data, op))
5163         continue;
5164
5165       n++;
5166     }
5167
5168   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5169     {
5170       struct version_info *info = ver_info (data, j);
5171
5172       if (info->inv_id && info->has_nonlin_use)
5173         n++;
5174     }
5175
5176   data->regs_used = n;
5177   if (dump_file && (dump_flags & TDF_DETAILS))
5178     fprintf (dump_file, "  regs_used %d\n", n);
5179
5180   if (dump_file && (dump_flags & TDF_DETAILS))
5181     {
5182       fprintf (dump_file, "  cost for size:\n");
5183       fprintf (dump_file, "  ivs\tcost\n");
5184       for (j = 0; j <= 2 * target_avail_regs; j++)
5185         fprintf (dump_file, "  %d\t%d\n", j,
5186                  ivopts_global_cost_for_size (data, j));
5187       fprintf (dump_file, "\n");
5188     }
5189 }
5190
5191 /* Returns true if A is a cheaper cost pair than B.  */
5192
5193 static bool
5194 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5195 {
5196   int cmp;
5197
5198   if (!a)
5199     return false;
5200
5201   if (!b)
5202     return true;
5203
5204   cmp = compare_costs (a->cost, b->cost);
5205   if (cmp < 0)
5206     return true;
5207
5208   if (cmp > 0)
5209     return false;
5210
5211   /* In case the costs are the same, prefer the cheaper candidate.  */
5212   if (a->cand->cost < b->cand->cost)
5213     return true;
5214
5215   return false;
5216 }
5217
5218
5219 /* Returns candidate by that USE is expressed in IVS.  */
5220
5221 static struct cost_pair *
5222 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5223 {
5224   return ivs->cand_for_use[use->id];
5225 }
5226
5227 /* Computes the cost field of IVS structure.  */
5228
5229 static void
5230 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5231 {
5232   comp_cost cost = ivs->cand_use_cost;
5233
5234   cost.cost += ivs->cand_cost;
5235
5236   cost.cost += ivopts_global_cost_for_size (data,
5237                                             ivs->n_regs + ivs->num_used_inv_expr);
5238
5239   ivs->cost = cost;
5240 }
5241
5242 /* Remove invariants in set INVS to set IVS.  */
5243
5244 static void
5245 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5246 {
5247   bitmap_iterator bi;
5248   unsigned iid;
5249
5250   if (!invs)
5251     return;
5252
5253   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5254     {
5255       ivs->n_invariant_uses[iid]--;
5256       if (ivs->n_invariant_uses[iid] == 0)
5257         ivs->n_regs--;
5258     }
5259 }
5260
5261 /* Set USE not to be expressed by any candidate in IVS.  */
5262
5263 static void
5264 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5265                  struct iv_use *use)
5266 {
5267   unsigned uid = use->id, cid;
5268   struct cost_pair *cp;
5269
5270   cp = ivs->cand_for_use[uid];
5271   if (!cp)
5272     return;
5273   cid = cp->cand->id;
5274
5275   ivs->bad_uses++;
5276   ivs->cand_for_use[uid] = NULL;
5277   ivs->n_cand_uses[cid]--;
5278
5279   if (ivs->n_cand_uses[cid] == 0)
5280     {
5281       bitmap_clear_bit (ivs->cands, cid);
5282       /* Do not count the pseudocandidates.  */
5283       if (cp->cand->iv)
5284         ivs->n_regs--;
5285       ivs->n_cands--;
5286       ivs->cand_cost -= cp->cand->cost;
5287
5288       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5289     }
5290
5291   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5292
5293   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5294
5295   if (cp->inv_expr_id != -1)
5296     {
5297       ivs->used_inv_expr[cp->inv_expr_id]--;
5298       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5299         ivs->num_used_inv_expr--;
5300     }
5301   iv_ca_recount_cost (data, ivs);
5302 }
5303
5304 /* Add invariants in set INVS to set IVS.  */
5305
5306 static void
5307 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5308 {
5309   bitmap_iterator bi;
5310   unsigned iid;
5311
5312   if (!invs)
5313     return;
5314
5315   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5316     {
5317       ivs->n_invariant_uses[iid]++;
5318       if (ivs->n_invariant_uses[iid] == 1)
5319         ivs->n_regs++;
5320     }
5321 }
5322
5323 /* Set cost pair for USE in set IVS to CP.  */
5324
5325 static void
5326 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5327               struct iv_use *use, struct cost_pair *cp)
5328 {
5329   unsigned uid = use->id, cid;
5330
5331   if (ivs->cand_for_use[uid] == cp)
5332     return;
5333
5334   if (ivs->cand_for_use[uid])
5335     iv_ca_set_no_cp (data, ivs, use);
5336
5337   if (cp)
5338     {
5339       cid = cp->cand->id;
5340
5341       ivs->bad_uses--;
5342       ivs->cand_for_use[uid] = cp;
5343       ivs->n_cand_uses[cid]++;
5344       if (ivs->n_cand_uses[cid] == 1)
5345         {
5346           bitmap_set_bit (ivs->cands, cid);
5347           /* Do not count the pseudocandidates.  */
5348           if (cp->cand->iv)
5349             ivs->n_regs++;
5350           ivs->n_cands++;
5351           ivs->cand_cost += cp->cand->cost;
5352
5353           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5354         }
5355
5356       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5357       iv_ca_set_add_invariants (ivs, cp->depends_on);
5358
5359       if (cp->inv_expr_id != -1)
5360         {
5361           ivs->used_inv_expr[cp->inv_expr_id]++;
5362           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5363             ivs->num_used_inv_expr++;
5364         }
5365       iv_ca_recount_cost (data, ivs);
5366     }
5367 }
5368
5369 /* Extend set IVS by expressing USE by some of the candidates in it
5370    if possible. All important candidates will be considered
5371    if IMPORTANT_CANDIDATES is true.  */
5372
5373 static void
5374 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5375                struct iv_use *use, bool important_candidates)
5376 {
5377   struct cost_pair *best_cp = NULL, *cp;
5378   bitmap_iterator bi;
5379   bitmap cands;
5380   unsigned i;
5381
5382   gcc_assert (ivs->upto >= use->id);
5383
5384   if (ivs->upto == use->id)
5385     {
5386       ivs->upto++;
5387       ivs->bad_uses++;
5388     }
5389
5390   cands = (important_candidates ? data->important_candidates : ivs->cands);
5391   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5392     {
5393       struct iv_cand *cand = iv_cand (data, i);
5394
5395       cp = get_use_iv_cost (data, use, cand);
5396
5397       if (cheaper_cost_pair (cp, best_cp))
5398         best_cp = cp;
5399     }
5400
5401   iv_ca_set_cp (data, ivs, use, best_cp);
5402 }
5403
5404 /* Get cost for assignment IVS.  */
5405
5406 static comp_cost
5407 iv_ca_cost (struct iv_ca *ivs)
5408 {
5409   /* This was a conditional expression but it triggered a bug in
5410      Sun C 5.5.  */
5411   if (ivs->bad_uses)
5412     return infinite_cost;
5413   else
5414     return ivs->cost;
5415 }
5416
5417 /* Returns true if all dependences of CP are among invariants in IVS.  */
5418
5419 static bool
5420 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5421 {
5422   unsigned i;
5423   bitmap_iterator bi;
5424
5425   if (!cp->depends_on)
5426     return true;
5427
5428   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5429     {
5430       if (ivs->n_invariant_uses[i] == 0)
5431         return false;
5432     }
5433
5434   return true;
5435 }
5436
5437 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5438    it before NEXT_CHANGE.  */
5439
5440 static struct iv_ca_delta *
5441 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5442                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5443 {
5444   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5445
5446   change->use = use;
5447   change->old_cp = old_cp;
5448   change->new_cp = new_cp;
5449   change->next_change = next_change;
5450
5451   return change;
5452 }
5453
5454 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5455    are rewritten.  */
5456
5457 static struct iv_ca_delta *
5458 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5459 {
5460   struct iv_ca_delta *last;
5461
5462   if (!l2)
5463     return l1;
5464
5465   if (!l1)
5466     return l2;
5467
5468   for (last = l1; last->next_change; last = last->next_change)
5469     continue;
5470   last->next_change = l2;
5471
5472   return l1;
5473 }
5474
5475 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5476
5477 static struct iv_ca_delta *
5478 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5479 {
5480   struct iv_ca_delta *act, *next, *prev = NULL;
5481   struct cost_pair *tmp;
5482
5483   for (act = delta; act; act = next)
5484     {
5485       next = act->next_change;
5486       act->next_change = prev;
5487       prev = act;
5488
5489       tmp = act->old_cp;
5490       act->old_cp = act->new_cp;
5491       act->new_cp = tmp;
5492     }
5493
5494   return prev;
5495 }
5496
5497 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5498    reverted instead.  */
5499
5500 static void
5501 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5502                     struct iv_ca_delta *delta, bool forward)
5503 {
5504   struct cost_pair *from, *to;
5505   struct iv_ca_delta *act;
5506
5507   if (!forward)
5508     delta = iv_ca_delta_reverse (delta);
5509
5510   for (act = delta; act; act = act->next_change)
5511     {
5512       from = act->old_cp;
5513       to = act->new_cp;
5514       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5515       iv_ca_set_cp (data, ivs, act->use, to);
5516     }
5517
5518   if (!forward)
5519     iv_ca_delta_reverse (delta);
5520 }
5521
5522 /* Returns true if CAND is used in IVS.  */
5523
5524 static bool
5525 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5526 {
5527   return ivs->n_cand_uses[cand->id] > 0;
5528 }
5529
5530 /* Returns number of induction variable candidates in the set IVS.  */
5531
5532 static unsigned
5533 iv_ca_n_cands (struct iv_ca *ivs)
5534 {
5535   return ivs->n_cands;
5536 }
5537
5538 /* Free the list of changes DELTA.  */
5539
5540 static void
5541 iv_ca_delta_free (struct iv_ca_delta **delta)
5542 {
5543   struct iv_ca_delta *act, *next;
5544
5545   for (act = *delta; act; act = next)
5546     {
5547       next = act->next_change;
5548       free (act);
5549     }
5550
5551   *delta = NULL;
5552 }
5553
5554 /* Allocates new iv candidates assignment.  */
5555
5556 static struct iv_ca *
5557 iv_ca_new (struct ivopts_data *data)
5558 {
5559   struct iv_ca *nw = XNEW (struct iv_ca);
5560
5561   nw->upto = 0;
5562   nw->bad_uses = 0;
5563   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5564   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5565   nw->cands = BITMAP_ALLOC (NULL);
5566   nw->n_cands = 0;
5567   nw->n_regs = 0;
5568   nw->cand_use_cost = no_cost;
5569   nw->cand_cost = 0;
5570   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5571   nw->cost = no_cost;
5572   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5573   nw->num_used_inv_expr = 0;
5574
5575   return nw;
5576 }
5577
5578 /* Free memory occupied by the set IVS.  */
5579
5580 static void
5581 iv_ca_free (struct iv_ca **ivs)
5582 {
5583   free ((*ivs)->cand_for_use);
5584   free ((*ivs)->n_cand_uses);
5585   BITMAP_FREE ((*ivs)->cands);
5586   free ((*ivs)->n_invariant_uses);
5587   free ((*ivs)->used_inv_expr);
5588   free (*ivs);
5589   *ivs = NULL;
5590 }
5591
5592 /* Dumps IVS to FILE.  */
5593
5594 static void
5595 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5596 {
5597   const char *pref = "  invariants ";
5598   unsigned i;
5599   comp_cost cost = iv_ca_cost (ivs);
5600
5601   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5602   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5603            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5604   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5605
5606    for (i = 0; i < ivs->upto; i++)
5607     {
5608       struct iv_use *use = iv_use (data, i);
5609       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5610       if (cp)
5611         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5612                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5613       else
5614         fprintf (file, "   use:%d --> ??\n", use->id);
5615     }
5616
5617   for (i = 1; i <= data->max_inv_id; i++)
5618     if (ivs->n_invariant_uses[i])
5619       {
5620         fprintf (file, "%s%d", pref, i);
5621         pref = ", ";
5622       }
5623   fprintf (file, "\n\n");
5624 }
5625
5626 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5627    new set, and store differences in DELTA.  Number of induction variables
5628    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5629    the function will try to find a solution with mimimal iv candidates.  */
5630
5631 static comp_cost
5632 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5633               struct iv_cand *cand, struct iv_ca_delta **delta,
5634               unsigned *n_ivs, bool min_ncand)
5635 {
5636   unsigned i;
5637   comp_cost cost;
5638   struct iv_use *use;
5639   struct cost_pair *old_cp, *new_cp;
5640
5641   *delta = NULL;
5642   for (i = 0; i < ivs->upto; i++)
5643     {
5644       use = iv_use (data, i);
5645       old_cp = iv_ca_cand_for_use (ivs, use);
5646
5647       if (old_cp
5648           && old_cp->cand == cand)
5649         continue;
5650
5651       new_cp = get_use_iv_cost (data, use, cand);
5652       if (!new_cp)
5653         continue;
5654
5655       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5656         continue;
5657
5658       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5659         continue;
5660
5661       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5662     }
5663
5664   iv_ca_delta_commit (data, ivs, *delta, true);
5665   cost = iv_ca_cost (ivs);
5666   if (n_ivs)
5667     *n_ivs = iv_ca_n_cands (ivs);
5668   iv_ca_delta_commit (data, ivs, *delta, false);
5669
5670   return cost;
5671 }
5672
5673 /* Try narrowing set IVS by removing CAND.  Return the cost of
5674    the new set and store the differences in DELTA.  */
5675
5676 static comp_cost
5677 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5678               struct iv_cand *cand, struct iv_ca_delta **delta)
5679 {
5680   unsigned i, ci;
5681   struct iv_use *use;
5682   struct cost_pair *old_cp, *new_cp, *cp;
5683   bitmap_iterator bi;
5684   struct iv_cand *cnd;
5685   comp_cost cost;
5686
5687   *delta = NULL;
5688   for (i = 0; i < n_iv_uses (data); i++)
5689     {
5690       use = iv_use (data, i);
5691
5692       old_cp = iv_ca_cand_for_use (ivs, use);
5693       if (old_cp->cand != cand)
5694         continue;
5695
5696       new_cp = NULL;
5697
5698       if (data->consider_all_candidates)
5699         {
5700           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5701             {
5702               if (ci == cand->id)
5703                 continue;
5704
5705               cnd = iv_cand (data, ci);
5706
5707               cp = get_use_iv_cost (data, use, cnd);
5708               if (!cp)
5709                 continue;
5710
5711               if (!iv_ca_has_deps (ivs, cp))
5712                 continue;
5713
5714               if (!cheaper_cost_pair (cp, new_cp))
5715                 continue;
5716
5717               new_cp = cp;
5718             }
5719         }
5720       else
5721         {
5722           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5723             {
5724               if (ci == cand->id)
5725                 continue;
5726
5727               cnd = iv_cand (data, ci);
5728
5729               cp = get_use_iv_cost (data, use, cnd);
5730               if (!cp)
5731                 continue;
5732               if (!iv_ca_has_deps (ivs, cp))
5733                 continue;
5734
5735               if (!cheaper_cost_pair (cp, new_cp))
5736                 continue;
5737
5738               new_cp = cp;
5739             }
5740         }
5741
5742       if (!new_cp)
5743         {
5744           iv_ca_delta_free (delta);
5745           return infinite_cost;
5746         }
5747
5748       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5749     }
5750
5751   iv_ca_delta_commit (data, ivs, *delta, true);
5752   cost = iv_ca_cost (ivs);
5753   iv_ca_delta_commit (data, ivs, *delta, false);
5754
5755   return cost;
5756 }
5757
5758 /* Try optimizing the set of candidates IVS by removing candidates different
5759    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5760    differences in DELTA.  */
5761
5762 static comp_cost
5763 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5764              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5765 {
5766   bitmap_iterator bi;
5767   struct iv_ca_delta *act_delta, *best_delta;
5768   unsigned i;
5769   comp_cost best_cost, acost;
5770   struct iv_cand *cand;
5771
5772   best_delta = NULL;
5773   best_cost = iv_ca_cost (ivs);
5774
5775   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5776     {
5777       cand = iv_cand (data, i);
5778
5779       if (cand == except_cand)
5780         continue;
5781
5782       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5783
5784       if (compare_costs (acost, best_cost) < 0)
5785         {
5786           best_cost = acost;
5787           iv_ca_delta_free (&best_delta);
5788           best_delta = act_delta;
5789         }
5790       else
5791         iv_ca_delta_free (&act_delta);
5792     }
5793
5794   if (!best_delta)
5795     {
5796       *delta = NULL;
5797       return best_cost;
5798     }
5799
5800   /* Recurse to possibly remove other unnecessary ivs.  */
5801   iv_ca_delta_commit (data, ivs, best_delta, true);
5802   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5803   iv_ca_delta_commit (data, ivs, best_delta, false);
5804   *delta = iv_ca_delta_join (best_delta, *delta);
5805   return best_cost;
5806 }
5807
5808 /* Tries to extend the sets IVS in the best possible way in order
5809    to express the USE.  If ORIGINALP is true, prefer candidates from
5810    the original set of IVs, otherwise favor important candidates not
5811    based on any memory object.  */
5812
5813 static bool
5814 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5815                   struct iv_use *use, bool originalp)
5816 {
5817   comp_cost best_cost, act_cost;
5818   unsigned i;
5819   bitmap_iterator bi;
5820   struct iv_cand *cand;
5821   struct iv_ca_delta *best_delta = NULL, *act_delta;
5822   struct cost_pair *cp;
5823
5824   iv_ca_add_use (data, ivs, use, false);
5825   best_cost = iv_ca_cost (ivs);
5826
5827   cp = iv_ca_cand_for_use (ivs, use);
5828   if (!cp)
5829     {
5830       ivs->upto--;
5831       ivs->bad_uses--;
5832       iv_ca_add_use (data, ivs, use, true);
5833       best_cost = iv_ca_cost (ivs);
5834       cp = iv_ca_cand_for_use (ivs, use);
5835     }
5836   if (cp)
5837     {
5838       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5839       iv_ca_set_no_cp (data, ivs, use);
5840     }
5841
5842   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5843      first try important candidates not based on any memory object.  Only if
5844      this fails, try the specific ones.  Rationale -- in loops with many
5845      variables the best choice often is to use just one generic biv.  If we
5846      added here many ivs specific to the uses, the optimization algorithm later
5847      would be likely to get stuck in a local minimum, thus causing us to create
5848      too many ivs.  The approach from few ivs to more seems more likely to be
5849      successful -- starting from few ivs, replacing an expensive use by a
5850      specific iv should always be a win.  */
5851   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5852     {
5853       cand = iv_cand (data, i);
5854
5855       if (originalp && cand->pos !=IP_ORIGINAL)
5856         continue;
5857
5858       if (!originalp && cand->iv->base_object != NULL_TREE)
5859         continue;
5860
5861       if (iv_ca_cand_used_p (ivs, cand))
5862         continue;
5863
5864       cp = get_use_iv_cost (data, use, cand);
5865       if (!cp)
5866         continue;
5867
5868       iv_ca_set_cp (data, ivs, use, cp);
5869       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5870                                true);
5871       iv_ca_set_no_cp (data, ivs, use);
5872       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5873
5874       if (compare_costs (act_cost, best_cost) < 0)
5875         {
5876           best_cost = act_cost;
5877
5878           iv_ca_delta_free (&best_delta);
5879           best_delta = act_delta;
5880         }
5881       else
5882         iv_ca_delta_free (&act_delta);
5883     }
5884
5885   if (infinite_cost_p (best_cost))
5886     {
5887       for (i = 0; i < use->n_map_members; i++)
5888         {
5889           cp = use->cost_map + i;
5890           cand = cp->cand;
5891           if (!cand)
5892             continue;
5893
5894           /* Already tried this.  */
5895           if (cand->important)
5896             {
5897               if (originalp && cand->pos == IP_ORIGINAL)
5898                 continue;
5899               if (!originalp && cand->iv->base_object == NULL_TREE)
5900                 continue;
5901             }
5902
5903           if (iv_ca_cand_used_p (ivs, cand))
5904             continue;
5905
5906           act_delta = NULL;
5907           iv_ca_set_cp (data, ivs, use, cp);
5908           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5909           iv_ca_set_no_cp (data, ivs, use);
5910           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5911                                        cp, act_delta);
5912
5913           if (compare_costs (act_cost, best_cost) < 0)
5914             {
5915               best_cost = act_cost;
5916
5917               if (best_delta)
5918                 iv_ca_delta_free (&best_delta);
5919               best_delta = act_delta;
5920             }
5921           else
5922             iv_ca_delta_free (&act_delta);
5923         }
5924     }
5925
5926   iv_ca_delta_commit (data, ivs, best_delta, true);
5927   iv_ca_delta_free (&best_delta);
5928
5929   return !infinite_cost_p (best_cost);
5930 }
5931
5932 /* Finds an initial assignment of candidates to uses.  */
5933
5934 static struct iv_ca *
5935 get_initial_solution (struct ivopts_data *data, bool originalp)
5936 {
5937   struct iv_ca *ivs = iv_ca_new (data);
5938   unsigned i;
5939
5940   for (i = 0; i < n_iv_uses (data); i++)
5941     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5942       {
5943         iv_ca_free (&ivs);
5944         return NULL;
5945       }
5946
5947   return ivs;
5948 }
5949
5950 /* Tries to improve set of induction variables IVS.  */
5951
5952 static bool
5953 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5954 {
5955   unsigned i, n_ivs;
5956   comp_cost acost, best_cost = iv_ca_cost (ivs);
5957   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5958   struct iv_cand *cand;
5959
5960   /* Try extending the set of induction variables by one.  */
5961   for (i = 0; i < n_iv_cands (data); i++)
5962     {
5963       cand = iv_cand (data, i);
5964
5965       if (iv_ca_cand_used_p (ivs, cand))
5966         continue;
5967
5968       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5969       if (!act_delta)
5970         continue;
5971
5972       /* If we successfully added the candidate and the set is small enough,
5973          try optimizing it by removing other candidates.  */
5974       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5975         {
5976           iv_ca_delta_commit (data, ivs, act_delta, true);
5977           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5978           iv_ca_delta_commit (data, ivs, act_delta, false);
5979           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5980         }
5981
5982       if (compare_costs (acost, best_cost) < 0)
5983         {
5984           best_cost = acost;
5985           iv_ca_delta_free (&best_delta);
5986           best_delta = act_delta;
5987         }
5988       else
5989         iv_ca_delta_free (&act_delta);
5990     }
5991
5992   if (!best_delta)
5993     {
5994       /* Try removing the candidates from the set instead.  */
5995       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5996
5997       /* Nothing more we can do.  */
5998       if (!best_delta)
5999         return false;
6000     }
6001
6002   iv_ca_delta_commit (data, ivs, best_delta, true);
6003   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6004   iv_ca_delta_free (&best_delta);
6005   return true;
6006 }
6007
6008 /* Attempts to find the optimal set of induction variables.  We do simple
6009    greedy heuristic -- we try to replace at most one candidate in the selected
6010    solution and remove the unused ivs while this improves the cost.  */
6011
6012 static struct iv_ca *
6013 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6014 {
6015   struct iv_ca *set;
6016
6017   /* Get the initial solution.  */
6018   set = get_initial_solution (data, originalp);
6019   if (!set)
6020     {
6021       if (dump_file && (dump_flags & TDF_DETAILS))
6022         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6023       return NULL;
6024     }
6025
6026   if (dump_file && (dump_flags & TDF_DETAILS))
6027     {
6028       fprintf (dump_file, "Initial set of candidates:\n");
6029       iv_ca_dump (data, dump_file, set);
6030     }
6031
6032   while (try_improve_iv_set (data, set))
6033     {
6034       if (dump_file && (dump_flags & TDF_DETAILS))
6035         {
6036           fprintf (dump_file, "Improved to:\n");
6037           iv_ca_dump (data, dump_file, set);
6038         }
6039     }
6040
6041   return set;
6042 }
6043
6044 static struct iv_ca *
6045 find_optimal_iv_set (struct ivopts_data *data)
6046 {
6047   unsigned i;
6048   struct iv_ca *set, *origset;
6049   struct iv_use *use;
6050   comp_cost cost, origcost;
6051
6052   /* Determine the cost based on a strategy that starts with original IVs,
6053      and try again using a strategy that prefers candidates not based
6054      on any IVs.  */
6055   origset = find_optimal_iv_set_1 (data, true);
6056   set = find_optimal_iv_set_1 (data, false);
6057
6058   if (!origset && !set)
6059     return NULL;
6060
6061   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6062   cost = set ? iv_ca_cost (set) : infinite_cost;
6063
6064   if (dump_file && (dump_flags & TDF_DETAILS))
6065     {
6066       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6067                origcost.cost, origcost.complexity);
6068       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6069                cost.cost, cost.complexity);
6070     }
6071
6072   /* Choose the one with the best cost.  */
6073   if (compare_costs (origcost, cost) <= 0)
6074     {
6075       if (set)
6076         iv_ca_free (&set);
6077       set = origset;
6078     }
6079   else if (origset)
6080     iv_ca_free (&origset);
6081
6082   for (i = 0; i < n_iv_uses (data); i++)
6083     {
6084       use = iv_use (data, i);
6085       use->selected = iv_ca_cand_for_use (set, use)->cand;
6086     }
6087
6088   return set;
6089 }
6090
6091 /* Creates a new induction variable corresponding to CAND.  */
6092
6093 static void
6094 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6095 {
6096   gimple_stmt_iterator incr_pos;
6097   tree base;
6098   bool after = false;
6099
6100   if (!cand->iv)
6101     return;
6102
6103   switch (cand->pos)
6104     {
6105     case IP_NORMAL:
6106       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6107       break;
6108
6109     case IP_END:
6110       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6111       after = true;
6112       break;
6113
6114     case IP_AFTER_USE:
6115       after = true;
6116       /* fall through */
6117     case IP_BEFORE_USE:
6118       incr_pos = gsi_for_stmt (cand->incremented_at);
6119       break;
6120
6121     case IP_ORIGINAL:
6122       /* Mark that the iv is preserved.  */
6123       name_info (data, cand->var_before)->preserve_biv = true;
6124       name_info (data, cand->var_after)->preserve_biv = true;
6125
6126       /* Rewrite the increment so that it uses var_before directly.  */
6127       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6128       return;
6129     }
6130
6131   gimple_add_tmp_var (cand->var_before);
6132
6133   base = unshare_expr (cand->iv->base);
6134
6135   create_iv (base, unshare_expr (cand->iv->step),
6136              cand->var_before, data->current_loop,
6137              &incr_pos, after, &cand->var_before, &cand->var_after);
6138 }
6139
6140 /* Creates new induction variables described in SET.  */
6141
6142 static void
6143 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6144 {
6145   unsigned i;
6146   struct iv_cand *cand;
6147   bitmap_iterator bi;
6148
6149   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6150     {
6151       cand = iv_cand (data, i);
6152       create_new_iv (data, cand);
6153     }
6154
6155   if (dump_file && (dump_flags & TDF_DETAILS))
6156     {
6157       fprintf (dump_file, "\nSelected IV set: \n");
6158       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6159         {
6160           cand = iv_cand (data, i);
6161           dump_cand (dump_file, cand);
6162         }
6163       fprintf (dump_file, "\n");
6164     }
6165 }
6166
6167 /* Rewrites USE (definition of iv used in a nonlinear expression)
6168    using candidate CAND.  */
6169
6170 static void
6171 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6172                             struct iv_use *use, struct iv_cand *cand)
6173 {
6174   tree comp;
6175   tree op, tgt;
6176   gimple ass;
6177   gimple_stmt_iterator bsi;
6178
6179   /* An important special case -- if we are asked to express value of
6180      the original iv by itself, just exit; there is no need to
6181      introduce a new computation (that might also need casting the
6182      variable to unsigned and back).  */
6183   if (cand->pos == IP_ORIGINAL
6184       && cand->incremented_at == use->stmt)
6185     {
6186       enum tree_code stmt_code;
6187
6188       gcc_assert (is_gimple_assign (use->stmt));
6189       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6190
6191       /* Check whether we may leave the computation unchanged.
6192          This is the case only if it does not rely on other
6193          computations in the loop -- otherwise, the computation
6194          we rely upon may be removed in remove_unused_ivs,
6195          thus leading to ICE.  */
6196       stmt_code = gimple_assign_rhs_code (use->stmt);
6197       if (stmt_code == PLUS_EXPR
6198           || stmt_code == MINUS_EXPR
6199           || stmt_code == POINTER_PLUS_EXPR)
6200         {
6201           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6202             op = gimple_assign_rhs2 (use->stmt);
6203           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6204             op = gimple_assign_rhs1 (use->stmt);
6205           else
6206             op = NULL_TREE;
6207         }
6208       else
6209         op = NULL_TREE;
6210
6211       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6212         return;
6213     }
6214
6215   comp = get_computation (data->current_loop, use, cand);
6216   gcc_assert (comp != NULL_TREE);
6217
6218   switch (gimple_code (use->stmt))
6219     {
6220     case GIMPLE_PHI:
6221       tgt = PHI_RESULT (use->stmt);
6222
6223       /* If we should keep the biv, do not replace it.  */
6224       if (name_info (data, tgt)->preserve_biv)
6225         return;
6226
6227       bsi = gsi_after_labels (gimple_bb (use->stmt));
6228       break;
6229
6230     case GIMPLE_ASSIGN:
6231       tgt = gimple_assign_lhs (use->stmt);
6232       bsi = gsi_for_stmt (use->stmt);
6233       break;
6234
6235     default:
6236       gcc_unreachable ();
6237     }
6238
6239   if (!valid_gimple_rhs_p (comp)
6240       || (gimple_code (use->stmt) != GIMPLE_PHI
6241           /* We can't allow re-allocating the stmt as it might be pointed
6242              to still.  */
6243           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6244               >= gimple_num_ops (gsi_stmt (bsi)))))
6245     {
6246       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6247                                        true, GSI_SAME_STMT);
6248       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6249         {
6250           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6251           /* As this isn't a plain copy we have to reset alignment
6252              information.  */
6253           if (SSA_NAME_PTR_INFO (comp))
6254             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6255         }
6256     }
6257
6258   if (gimple_code (use->stmt) == GIMPLE_PHI)
6259     {
6260       ass = gimple_build_assign (tgt, comp);
6261       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6262
6263       bsi = gsi_for_stmt (use->stmt);
6264       remove_phi_node (&bsi, false);
6265     }
6266   else
6267     {
6268       gimple_assign_set_rhs_from_tree (&bsi, comp);
6269       use->stmt = gsi_stmt (bsi);
6270     }
6271 }
6272
6273 /* Performs a peephole optimization to reorder the iv update statement with
6274    a mem ref to enable instruction combining in later phases. The mem ref uses
6275    the iv value before the update, so the reordering transformation requires
6276    adjustment of the offset. CAND is the selected IV_CAND.
6277
6278    Example:
6279
6280    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6281    iv2 = iv1 + 1;
6282
6283    if (t < val)      (1)
6284      goto L;
6285    goto Head;
6286
6287
6288    directly propagating t over to (1) will introduce overlapping live range
6289    thus increase register pressure. This peephole transform it into:
6290
6291
6292    iv2 = iv1 + 1;
6293    t = MEM_REF (base, iv2, 8, 8);
6294    if (t < val)
6295      goto L;
6296    goto Head;
6297 */
6298
6299 static void
6300 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6301 {
6302   tree var_after;
6303   gimple iv_update, stmt;
6304   basic_block bb;
6305   gimple_stmt_iterator gsi, gsi_iv;
6306
6307   if (cand->pos != IP_NORMAL)
6308     return;
6309
6310   var_after = cand->var_after;
6311   iv_update = SSA_NAME_DEF_STMT (var_after);
6312
6313   bb = gimple_bb (iv_update);
6314   gsi = gsi_last_nondebug_bb (bb);
6315   stmt = gsi_stmt (gsi);
6316
6317   /* Only handle conditional statement for now.  */
6318   if (gimple_code (stmt) != GIMPLE_COND)
6319     return;
6320
6321   gsi_prev_nondebug (&gsi);
6322   stmt = gsi_stmt (gsi);
6323   if (stmt != iv_update)
6324     return;
6325
6326   gsi_prev_nondebug (&gsi);
6327   if (gsi_end_p (gsi))
6328     return;
6329
6330   stmt = gsi_stmt (gsi);
6331   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6332     return;
6333
6334   if (stmt != use->stmt)
6335     return;
6336
6337   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6338     return;
6339
6340   if (dump_file && (dump_flags & TDF_DETAILS))
6341     {
6342       fprintf (dump_file, "Reordering \n");
6343       print_gimple_stmt (dump_file, iv_update, 0, 0);
6344       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6345       fprintf (dump_file, "\n");
6346     }
6347
6348   gsi = gsi_for_stmt (use->stmt);
6349   gsi_iv = gsi_for_stmt (iv_update);
6350   gsi_move_before (&gsi_iv, &gsi);
6351
6352   cand->pos = IP_BEFORE_USE;
6353   cand->incremented_at = use->stmt;
6354 }
6355
6356 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6357
6358 static void
6359 rewrite_use_address (struct ivopts_data *data,
6360                      struct iv_use *use, struct iv_cand *cand)
6361 {
6362   aff_tree aff;
6363   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6364   tree base_hint = NULL_TREE;
6365   tree ref, iv;
6366   bool ok;
6367
6368   adjust_iv_update_pos (cand, use);
6369   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6370   gcc_assert (ok);
6371   unshare_aff_combination (&aff);
6372
6373   /* To avoid undefined overflow problems, all IV candidates use unsigned
6374      integer types.  The drawback is that this makes it impossible for
6375      create_mem_ref to distinguish an IV that is based on a memory object
6376      from one that represents simply an offset.
6377
6378      To work around this problem, we pass a hint to create_mem_ref that
6379      indicates which variable (if any) in aff is an IV based on a memory
6380      object.  Note that we only consider the candidate.  If this is not
6381      based on an object, the base of the reference is in some subexpression
6382      of the use -- but these will use pointer types, so they are recognized
6383      by the create_mem_ref heuristics anyway.  */
6384   if (cand->iv->base_object)
6385     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6386
6387   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6388   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6389                         reference_alias_ptr_type (*use->op_p),
6390                         iv, base_hint, data->speed);
6391   copy_ref_info (ref, *use->op_p);
6392   *use->op_p = ref;
6393 }
6394
6395 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6396    candidate CAND.  */
6397
6398 static void
6399 rewrite_use_compare (struct ivopts_data *data,
6400                      struct iv_use *use, struct iv_cand *cand)
6401 {
6402   tree comp, *var_p, op, bound;
6403   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6404   enum tree_code compare;
6405   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6406   bool ok;
6407
6408   bound = cp->value;
6409   if (bound)
6410     {
6411       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6412       tree var_type = TREE_TYPE (var);
6413       gimple_seq stmts;
6414
6415       if (dump_file && (dump_flags & TDF_DETAILS))
6416         {
6417           fprintf (dump_file, "Replacing exit test: ");
6418           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6419         }
6420       compare = cp->comp;
6421       bound = unshare_expr (fold_convert (var_type, bound));
6422       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6423       if (stmts)
6424         gsi_insert_seq_on_edge_immediate (
6425                 loop_preheader_edge (data->current_loop),
6426                 stmts);
6427
6428       gimple_cond_set_lhs (use->stmt, var);
6429       gimple_cond_set_code (use->stmt, compare);
6430       gimple_cond_set_rhs (use->stmt, op);
6431       return;
6432     }
6433
6434   /* The induction variable elimination failed; just express the original
6435      giv.  */
6436   comp = get_computation (data->current_loop, use, cand);
6437   gcc_assert (comp != NULL_TREE);
6438
6439   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6440   gcc_assert (ok);
6441
6442   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6443                                      true, GSI_SAME_STMT);
6444 }
6445
6446 /* Rewrites USE using candidate CAND.  */
6447
6448 static void
6449 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6450 {
6451   switch (use->type)
6452     {
6453       case USE_NONLINEAR_EXPR:
6454         rewrite_use_nonlinear_expr (data, use, cand);
6455         break;
6456
6457       case USE_ADDRESS:
6458         rewrite_use_address (data, use, cand);
6459         break;
6460
6461       case USE_COMPARE:
6462         rewrite_use_compare (data, use, cand);
6463         break;
6464
6465       default:
6466         gcc_unreachable ();
6467     }
6468
6469   update_stmt (use->stmt);
6470 }
6471
6472 /* Rewrite the uses using the selected induction variables.  */
6473
6474 static void
6475 rewrite_uses (struct ivopts_data *data)
6476 {
6477   unsigned i;
6478   struct iv_cand *cand;
6479   struct iv_use *use;
6480
6481   for (i = 0; i < n_iv_uses (data); i++)
6482     {
6483       use = iv_use (data, i);
6484       cand = use->selected;
6485       gcc_assert (cand);
6486
6487       rewrite_use (data, use, cand);
6488     }
6489 }
6490
6491 /* Removes the ivs that are not used after rewriting.  */
6492
6493 static void
6494 remove_unused_ivs (struct ivopts_data *data)
6495 {
6496   unsigned j;
6497   bitmap_iterator bi;
6498   bitmap toremove = BITMAP_ALLOC (NULL);
6499
6500   /* Figure out an order in which to release SSA DEFs so that we don't
6501      release something that we'd have to propagate into a debug stmt
6502      afterwards.  */
6503   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6504     {
6505       struct version_info *info;
6506
6507       info = ver_info (data, j);
6508       if (info->iv
6509           && !integer_zerop (info->iv->step)
6510           && !info->inv_id
6511           && !info->iv->have_use_for
6512           && !info->preserve_biv)
6513         {
6514           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6515
6516           tree def = info->iv->ssa_name;
6517
6518           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6519             {
6520               imm_use_iterator imm_iter;
6521               use_operand_p use_p;
6522               gimple stmt;
6523               int count = 0;
6524
6525               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6526                 {
6527                   if (!gimple_debug_bind_p (stmt))
6528                     continue;
6529
6530                   /* We just want to determine whether to do nothing
6531                      (count == 0), to substitute the computed
6532                      expression into a single use of the SSA DEF by
6533                      itself (count == 1), or to use a debug temp
6534                      because the SSA DEF is used multiple times or as
6535                      part of a larger expression (count > 1). */
6536                   count++;
6537                   if (gimple_debug_bind_get_value (stmt) != def)
6538                     count++;
6539
6540                   if (count > 1)
6541                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6542                 }
6543
6544               if (!count)
6545                 continue;
6546
6547               struct iv_use dummy_use;
6548               struct iv_cand *best_cand = NULL, *cand;
6549               unsigned i, best_pref = 0, cand_pref;
6550
6551               memset (&dummy_use, 0, sizeof (dummy_use));
6552               dummy_use.iv = info->iv;
6553               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6554                 {
6555                   cand = iv_use (data, i)->selected;
6556                   if (cand == best_cand)
6557                     continue;
6558                   cand_pref = operand_equal_p (cand->iv->step,
6559                                                info->iv->step, 0)
6560                     ? 4 : 0;
6561                   cand_pref
6562                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6563                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6564                     ? 2 : 0;
6565                   cand_pref
6566                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6567                     ? 1 : 0;
6568                   if (best_cand == NULL || best_pref < cand_pref)
6569                     {
6570                       best_cand = cand;
6571                       best_pref = cand_pref;
6572                     }
6573                 }
6574
6575               if (!best_cand)
6576                 continue;
6577
6578               tree comp = get_computation_at (data->current_loop,
6579                                               &dummy_use, best_cand,
6580                                               SSA_NAME_DEF_STMT (def));
6581               if (!comp)
6582                 continue;
6583
6584               if (count > 1)
6585                 {
6586                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6587                   DECL_ARTIFICIAL (vexpr) = 1;
6588                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6589                   if (SSA_NAME_VAR (def))
6590                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6591                   else
6592                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6593                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6594                   gimple_stmt_iterator gsi;
6595
6596                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6597                     gsi = gsi_after_labels (gimple_bb
6598                                             (SSA_NAME_DEF_STMT (def)));
6599                   else
6600                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6601
6602                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6603                   comp = vexpr;
6604                 }
6605
6606               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6607                 {
6608                   if (!gimple_debug_bind_p (stmt))
6609                     continue;
6610
6611                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6612                     SET_USE (use_p, comp);
6613
6614                   update_stmt (stmt);
6615                 }
6616             }
6617         }
6618     }
6619
6620   release_defs_bitset (toremove);
6621
6622   BITMAP_FREE (toremove);
6623 }
6624
6625 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6626    for pointer_map_traverse.  */
6627
6628 static bool
6629 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6630                       void *data ATTRIBUTE_UNUSED)
6631 {
6632   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6633
6634   free (niter);
6635   return true;
6636 }
6637
6638 /* Frees data allocated by the optimization of a single loop.  */
6639
6640 static void
6641 free_loop_data (struct ivopts_data *data)
6642 {
6643   unsigned i, j;
6644   bitmap_iterator bi;
6645   tree obj;
6646
6647   if (data->niters)
6648     {
6649       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6650       pointer_map_destroy (data->niters);
6651       data->niters = NULL;
6652     }
6653
6654   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6655     {
6656       struct version_info *info;
6657
6658       info = ver_info (data, i);
6659       free (info->iv);
6660       info->iv = NULL;
6661       info->has_nonlin_use = false;
6662       info->preserve_biv = false;
6663       info->inv_id = 0;
6664     }
6665   bitmap_clear (data->relevant);
6666   bitmap_clear (data->important_candidates);
6667
6668   for (i = 0; i < n_iv_uses (data); i++)
6669     {
6670       struct iv_use *use = iv_use (data, i);
6671
6672       free (use->iv);
6673       BITMAP_FREE (use->related_cands);
6674       for (j = 0; j < use->n_map_members; j++)
6675         if (use->cost_map[j].depends_on)
6676           BITMAP_FREE (use->cost_map[j].depends_on);
6677       free (use->cost_map);
6678       free (use);
6679     }
6680   data->iv_uses.truncate (0);
6681
6682   for (i = 0; i < n_iv_cands (data); i++)
6683     {
6684       struct iv_cand *cand = iv_cand (data, i);
6685
6686       free (cand->iv);
6687       if (cand->depends_on)
6688         BITMAP_FREE (cand->depends_on);
6689       free (cand);
6690     }
6691   data->iv_candidates.truncate (0);
6692
6693   if (data->version_info_size < num_ssa_names)
6694     {
6695       data->version_info_size = 2 * num_ssa_names;
6696       free (data->version_info);
6697       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6698     }
6699
6700   data->max_inv_id = 0;
6701
6702   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6703     SET_DECL_RTL (obj, NULL_RTX);
6704
6705   decl_rtl_to_reset.truncate (0);
6706
6707   data->inv_expr_tab.empty ();
6708   data->inv_expr_id = 0;
6709 }
6710
6711 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6712    loop tree.  */
6713
6714 static void
6715 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6716 {
6717   free_loop_data (data);
6718   free (data->version_info);
6719   BITMAP_FREE (data->relevant);
6720   BITMAP_FREE (data->important_candidates);
6721
6722   decl_rtl_to_reset.release ();
6723   data->iv_uses.release ();
6724   data->iv_candidates.release ();
6725   data->inv_expr_tab.dispose ();
6726 }
6727
6728 /* Returns true if the loop body BODY includes any function calls.  */
6729
6730 static bool
6731 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6732 {
6733   gimple_stmt_iterator gsi;
6734   unsigned i;
6735
6736   for (i = 0; i < num_nodes; i++)
6737     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6738       {
6739         gimple stmt = gsi_stmt (gsi);
6740         if (is_gimple_call (stmt)
6741             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6742           return true;
6743       }
6744   return false;
6745 }
6746
6747 /* Optimizes the LOOP.  Returns true if anything changed.  */
6748
6749 static bool
6750 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6751 {
6752   bool changed = false;
6753   struct iv_ca *iv_ca;
6754   edge exit = single_dom_exit (loop);
6755   basic_block *body;
6756
6757   gcc_assert (!data->niters);
6758   data->current_loop = loop;
6759   data->speed = optimize_loop_for_speed_p (loop);
6760
6761   if (dump_file && (dump_flags & TDF_DETAILS))
6762     {
6763       fprintf (dump_file, "Processing loop %d\n", loop->num);
6764
6765       if (exit)
6766         {
6767           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6768                    exit->src->index, exit->dest->index);
6769           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6770           fprintf (dump_file, "\n");
6771         }
6772
6773       fprintf (dump_file, "\n");
6774     }
6775
6776   body = get_loop_body (loop);
6777   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6778   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6779   free (body);
6780
6781   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6782
6783   /* For each ssa name determines whether it behaves as an induction variable
6784      in some loop.  */
6785   if (!find_induction_variables (data))
6786     goto finish;
6787
6788   /* Finds interesting uses (item 1).  */
6789   find_interesting_uses (data);
6790   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6791     goto finish;
6792
6793   /* Finds candidates for the induction variables (item 2).  */
6794   find_iv_candidates (data);
6795
6796   /* Calculates the costs (item 3, part 1).  */
6797   determine_iv_costs (data);
6798   determine_use_iv_costs (data);
6799   determine_set_costs (data);
6800
6801   /* Find the optimal set of induction variables (item 3, part 2).  */
6802   iv_ca = find_optimal_iv_set (data);
6803   if (!iv_ca)
6804     goto finish;
6805   changed = true;
6806
6807   /* Create the new induction variables (item 4, part 1).  */
6808   create_new_ivs (data, iv_ca);
6809   iv_ca_free (&iv_ca);
6810
6811   /* Rewrite the uses (item 4, part 2).  */
6812   rewrite_uses (data);
6813
6814   /* Remove the ivs that are unused after rewriting.  */
6815   remove_unused_ivs (data);
6816
6817   /* We have changed the structure of induction variables; it might happen
6818      that definitions in the scev database refer to some of them that were
6819      eliminated.  */
6820   scev_reset ();
6821
6822 finish:
6823   free_loop_data (data);
6824
6825   return changed;
6826 }
6827
6828 /* Main entry point.  Optimizes induction variables in loops.  */
6829
6830 void
6831 tree_ssa_iv_optimize (void)
6832 {
6833   struct loop *loop;
6834   struct ivopts_data data;
6835   loop_iterator li;
6836
6837   tree_ssa_iv_optimize_init (&data);
6838
6839   /* Optimize the loops starting with the innermost ones.  */
6840   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6841     {
6842       if (dump_file && (dump_flags & TDF_DETAILS))
6843         flow_loop_dump (loop, dump_file, NULL, 1);
6844
6845       tree_ssa_iv_optimize_loop (&data, loop);
6846     }
6847
6848   tree_ssa_iv_optimize_finalize (&data);
6849 }