gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "tm_p.h"
  70 #include "basic-block.h"
  71 #include "gimple-pretty-print.h"
  72 #include "gimple.h"
  73 #include "gimplify.h"
  74 #include "gimple-iterator.h"
  75 #include "gimplify-me.h"
  76 #include "gimple-ssa.h"
  77 #include "cgraph.h"
  78 #include "tree-cfg.h"
  79 #include "tree-phinodes.h"
  80 #include "ssa-iterators.h"
  81 #include "tree-ssanames.h"
  82 #include "tree-ssa-loop-ivopts.h"
  83 #include "tree-ssa-loop-manip.h"
  84 #include "tree-ssa-loop-niter.h"
  85 #include "tree-ssa-loop.h"
  86 #include "tree-dfa.h"
  87 #include "tree-ssa.h"
  88 #include "cfgloop.h"
  89 #include "tree-pass.h"
  90 #include "ggc.h"
  91 #include "insn-config.h"
  92 #include "pointer-set.h"
  93 #include "hash-table.h"
  94 #include "tree-chrec.h"
  95 #include "tree-scalar-evolution.h"
  96 #include "cfgloop.h"
  97 #include "params.h"
  98 #include "langhooks.h"
  99 #include "tree-affine.h"
 100 #include "target.h"
 101 #include "tree-inline.h"
 102 #include "tree-ssa-propagate.h"
 103 #include "expmed.h"
 104 #include "tree-ssa-address.h"
 105
 106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 107    cost of different addressing modes.  This should be moved to a TBD
 108    interface between the GIMPLE and RTL worlds.  */
 109 #include "expr.h"
 110 #include "recog.h"
 111
 112 /* The infinite cost.  */
 113 #define INFTY 10000000
 114
 115 #define AVG_LOOP_NITER(LOOP) 5
 116
 117 /* Returns the expected number of loop iterations for LOOP.
 118    The average trip count is computed from profile data if it
 119    exists. */
 120
 121 static inline HOST_WIDE_INT
 122 avg_loop_niter (struct loop *loop)
 123 {
 124   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 125   if (niter == -1)
 126     return AVG_LOOP_NITER (loop);
 127
 128   return niter;
 129 }
 130
 131 /* Representation of the induction variable.  */
 132 struct iv
 133 {
 134   tree base;            /* Initial value of the iv.  */
 135   tree base_object;     /* A memory object to that the induction variable points.  */
 136   tree step;            /* Step of the iv (constant only).  */
 137   tree ssa_name;        /* The ssa name with the value.  */
 138   bool biv_p;           /* Is it a biv?  */
 139   bool have_use_for;    /* Do we already have a use for it?  */
 140   unsigned use_id;      /* The identifier in the use if it is the case.  */
 141 };
 142
 143 /* Per-ssa version information (induction variable descriptions, etc.).  */
 144 struct version_info
 145 {
 146   tree name;            /* The ssa name.  */
 147   struct iv *iv;        /* Induction variable description.  */
 148   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 149                            an expression that is not an induction variable.  */
 150   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 151   unsigned inv_id;      /* Id of an invariant.  */
 152 };
 153
 154 /* Types of uses.  */
 155 enum use_type
 156 {
 157   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 158   USE_ADDRESS,          /* Use in an address.  */
 159   USE_COMPARE           /* Use is a compare.  */
 160 };
 161
 162 /* Cost of a computation.  */
 163 typedef struct
 164 {
 165   int cost;             /* The runtime cost.  */
 166   unsigned complexity;  /* The estimate of the complexity of the code for
 167                            the computation (in no concrete units --
 168                            complexity field should be larger for more
 169                            complex expressions and addressing modes).  */
 170 } comp_cost;
 171
 172 static const comp_cost no_cost = {0, 0};
 173 static const comp_cost infinite_cost = {INFTY, INFTY};
 174
 175 /* The candidate - cost pair.  */
 176 struct cost_pair
 177 {
 178   struct iv_cand *cand; /* The candidate.  */
 179   comp_cost cost;       /* The cost.  */
 180   bitmap depends_on;    /* The list of invariants that have to be
 181                            preserved.  */
 182   tree value;           /* For final value elimination, the expression for
 183                            the final value of the iv.  For iv elimination,
 184                            the new bound to compare with.  */
 185   enum tree_code comp;  /* For iv elimination, the comparison.  */
 186   int inv_expr_id;      /* Loop invariant expression id.  */
 187 };
 188
 189 /* Use.  */
 190 struct iv_use
 191 {
 192   unsigned id;          /* The id of the use.  */
 193   enum use_type type;   /* Type of the use.  */
 194   struct iv *iv;        /* The induction variable it is based on.  */
 195   gimple stmt;          /* Statement in that it occurs.  */
 196   tree *op_p;           /* The place where it occurs.  */
 197   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 198                            important ones.  */
 199
 200   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 201   struct cost_pair *cost_map;
 202                         /* The costs wrto the iv candidates.  */
 203
 204   struct iv_cand *selected;
 205                         /* The selected candidate.  */
 206 };
 207
 208 /* The position where the iv is computed.  */
 209 enum iv_position
 210 {
 211   IP_NORMAL,            /* At the end, just before the exit condition.  */
 212   IP_END,               /* At the end of the latch block.  */
 213   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 214   IP_AFTER_USE,         /* Immediately after a specific use.  */
 215   IP_ORIGINAL           /* The original biv.  */
 216 };
 217
 218 /* The induction variable candidate.  */
 219 struct iv_cand
 220 {
 221   unsigned id;          /* The number of the candidate.  */
 222   bool important;       /* Whether this is an "important" candidate, i.e. such
 223                            that it should be considered by all uses.  */
 224   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 225   gimple incremented_at;/* For original biv, the statement where it is
 226                            incremented.  */
 227   tree var_before;      /* The variable used for it before increment.  */
 228   tree var_after;       /* The variable used for it after increment.  */
 229   struct iv *iv;        /* The value of the candidate.  NULL for
 230                            "pseudocandidate" used to indicate the possibility
 231                            to replace the final value of an iv by direct
 232                            computation of the value.  */
 233   unsigned cost;        /* Cost of the candidate.  */
 234   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 235   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 236                               where it is incremented.  */
 237   bitmap depends_on;    /* The list of invariants that are used in step of the
 238                            biv.  */
 239 };
 240
 241 /* Loop invariant expression hashtable entry.  */
 242 struct iv_inv_expr_ent
 243 {
 244   tree expr;
 245   int id;
 246   hashval_t hash;
 247 };
 248
 249 /* The data used by the induction variable optimizations.  */
 250
 251 typedef struct iv_use *iv_use_p;
 252
 253 typedef struct iv_cand *iv_cand_p;
 254
 255 /* Hashtable helpers.  */
 256
 257 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 258 {
 259   typedef iv_inv_expr_ent value_type;
 260   typedef iv_inv_expr_ent compare_type;
 261   static inline hashval_t hash (const value_type *);
 262   static inline bool equal (const value_type *, const compare_type *);
 263 };
 264
 265 /* Hash function for loop invariant expressions.  */
 266
 267 inline hashval_t
 268 iv_inv_expr_hasher::hash (const value_type *expr)
 269 {
 270   return expr->hash;
 271 }
 272
 273 /* Hash table equality function for expressions.  */
 274
 275 inline bool
 276 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 277 {
 278   return expr1->hash == expr2->hash
 279          && operand_equal_p (expr1->expr, expr2->expr, 0);
 280 }
 281
 282 struct ivopts_data
 283 {
 284   /* The currently optimized loop.  */
 285   struct loop *current_loop;
 286
 287   /* Numbers of iterations for all exits of the current loop.  */
 288   struct pointer_map_t *niters;
 289
 290   /* Number of registers used in it.  */
 291   unsigned regs_used;
 292
 293   /* The size of version_info array allocated.  */
 294   unsigned version_info_size;
 295
 296   /* The array of information for the ssa names.  */
 297   struct version_info *version_info;
 298
 299   /* The hashtable of loop invariant expressions created
 300      by ivopt.  */
 301   hash_table <iv_inv_expr_hasher> inv_expr_tab;
 302
 303   /* Loop invariant expression id.  */
 304   int inv_expr_id;
 305
 306   /* The bitmap of indices in version_info whose value was changed.  */
 307   bitmap relevant;
 308
 309   /* The uses of induction variables.  */
 310   vec<iv_use_p> iv_uses;
 311
 312   /* The candidates.  */
 313   vec<iv_cand_p> iv_candidates;
 314
 315   /* A bitmap of important candidates.  */
 316   bitmap important_candidates;
 317
 318   /* The maximum invariant id.  */
 319   unsigned max_inv_id;
 320
 321   /* Whether to consider just related and important candidates when replacing a
 322      use.  */
 323   bool consider_all_candidates;
 324
 325   /* Are we optimizing for speed?  */
 326   bool speed;
 327
 328   /* Whether the loop body includes any function calls.  */
 329   bool body_includes_call;
 330
 331   /* Whether the loop body can only be exited via single exit.  */
 332   bool loop_single_exit_p;
 333 };
 334
 335 /* An assignment of iv candidates to uses.  */
 336
 337 struct iv_ca
 338 {
 339   /* The number of uses covered by the assignment.  */
 340   unsigned upto;
 341
 342   /* Number of uses that cannot be expressed by the candidates in the set.  */
 343   unsigned bad_uses;
 344
 345   /* Candidate assigned to a use, together with the related costs.  */
 346   struct cost_pair **cand_for_use;
 347
 348   /* Number of times each candidate is used.  */
 349   unsigned *n_cand_uses;
 350
 351   /* The candidates used.  */
 352   bitmap cands;
 353
 354   /* The number of candidates in the set.  */
 355   unsigned n_cands;
 356
 357   /* Total number of registers needed.  */
 358   unsigned n_regs;
 359
 360   /* Total cost of expressing uses.  */
 361   comp_cost cand_use_cost;
 362
 363   /* Total cost of candidates.  */
 364   unsigned cand_cost;
 365
 366   /* Number of times each invariant is used.  */
 367   unsigned *n_invariant_uses;
 368
 369   /* The array holding the number of uses of each loop
 370      invariant expressions created by ivopt.  */
 371   unsigned *used_inv_expr;
 372
 373   /* The number of created loop invariants.  */
 374   unsigned num_used_inv_expr;
 375
 376   /* Total cost of the assignment.  */
 377   comp_cost cost;
 378 };
 379
 380 /* Difference of two iv candidate assignments.  */
 381
 382 struct iv_ca_delta
 383 {
 384   /* Changed use.  */
 385   struct iv_use *use;
 386
 387   /* An old assignment (for rollback purposes).  */
 388   struct cost_pair *old_cp;
 389
 390   /* A new assignment.  */
 391   struct cost_pair *new_cp;
 392
 393   /* Next change in the list.  */
 394   struct iv_ca_delta *next_change;
 395 };
 396
 397 /* Bound on number of candidates below that all candidates are considered.  */
 398
 399 #define CONSIDER_ALL_CANDIDATES_BOUND \
 400   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 401
 402 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 403    optimizing such a loop would help, and it would take ages).  */
 404
 405 #define MAX_CONSIDERED_USES \
 406   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 407
 408 /* If there are at most this number of ivs in the set, try removing unnecessary
 409    ivs from the set always.  */
 410
 411 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 412   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 413
 414 /* The list of trees for that the decl_rtl field must be reset is stored
 415    here.  */
 416
 417 static vec<tree> decl_rtl_to_reset;
 418
 419 static comp_cost force_expr_to_var_cost (tree, bool);
 420
 421 /* Number of uses recorded in DATA.  */
 422
 423 static inline unsigned
 424 n_iv_uses (struct ivopts_data *data)
 425 {
 426   return data->iv_uses.length ();
 427 }
 428
 429 /* Ith use recorded in DATA.  */
 430
 431 static inline struct iv_use *
 432 iv_use (struct ivopts_data *data, unsigned i)
 433 {
 434   return data->iv_uses[i];
 435 }
 436
 437 /* Number of candidates recorded in DATA.  */
 438
 439 static inline unsigned
 440 n_iv_cands (struct ivopts_data *data)
 441 {
 442   return data->iv_candidates.length ();
 443 }
 444
 445 /* Ith candidate recorded in DATA.  */
 446
 447 static inline struct iv_cand *
 448 iv_cand (struct ivopts_data *data, unsigned i)
 449 {
 450   return data->iv_candidates[i];
 451 }
 452
 453 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 454
 455 edge
 456 single_dom_exit (struct loop *loop)
 457 {
 458   edge exit = single_exit (loop);
 459
 460   if (!exit)
 461     return NULL;
 462
 463   if (!just_once_each_iteration_p (loop, exit->src))
 464     return NULL;
 465
 466   return exit;
 467 }
 468
 469 /* Dumps information about the induction variable IV to FILE.  */
 470
 471 void
 472 dump_iv (FILE *file, struct iv *iv)
 473 {
 474   if (iv->ssa_name)
 475     {
 476       fprintf (file, "ssa name ");
 477       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 478       fprintf (file, "\n");
 479     }
 480
 481   fprintf (file, "  type ");
 482   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 483   fprintf (file, "\n");
 484
 485   if (iv->step)
 486     {
 487       fprintf (file, "  base ");
 488       print_generic_expr (file, iv->base, TDF_SLIM);
 489       fprintf (file, "\n");
 490
 491       fprintf (file, "  step ");
 492       print_generic_expr (file, iv->step, TDF_SLIM);
 493       fprintf (file, "\n");
 494     }
 495   else
 496     {
 497       fprintf (file, "  invariant ");
 498       print_generic_expr (file, iv->base, TDF_SLIM);
 499       fprintf (file, "\n");
 500     }
 501
 502   if (iv->base_object)
 503     {
 504       fprintf (file, "  base object ");
 505       print_generic_expr (file, iv->base_object, TDF_SLIM);
 506       fprintf (file, "\n");
 507     }
 508
 509   if (iv->biv_p)
 510     fprintf (file, "  is a biv\n");
 511 }
 512
 513 /* Dumps information about the USE to FILE.  */
 514
 515 void
 516 dump_use (FILE *file, struct iv_use *use)
 517 {
 518   fprintf (file, "use %d\n", use->id);
 519
 520   switch (use->type)
 521     {
 522     case USE_NONLINEAR_EXPR:
 523       fprintf (file, "  generic\n");
 524       break;
 525
 526     case USE_ADDRESS:
 527       fprintf (file, "  address\n");
 528       break;
 529
 530     case USE_COMPARE:
 531       fprintf (file, "  compare\n");
 532       break;
 533
 534     default:
 535       gcc_unreachable ();
 536     }
 537
 538   fprintf (file, "  in statement ");
 539   print_gimple_stmt (file, use->stmt, 0, 0);
 540   fprintf (file, "\n");
 541
 542   fprintf (file, "  at position ");
 543   if (use->op_p)
 544     print_generic_expr (file, *use->op_p, TDF_SLIM);
 545   fprintf (file, "\n");
 546
 547   dump_iv (file, use->iv);
 548
 549   if (use->related_cands)
 550     {
 551       fprintf (file, "  related candidates ");
 552       dump_bitmap (file, use->related_cands);
 553     }
 554 }
 555
 556 /* Dumps information about the uses to FILE.  */
 557
 558 void
 559 dump_uses (FILE *file, struct ivopts_data *data)
 560 {
 561   unsigned i;
 562   struct iv_use *use;
 563
 564   for (i = 0; i < n_iv_uses (data); i++)
 565     {
 566       use = iv_use (data, i);
 567
 568       dump_use (file, use);
 569       fprintf (file, "\n");
 570     }
 571 }
 572
 573 /* Dumps information about induction variable candidate CAND to FILE.  */
 574
 575 void
 576 dump_cand (FILE *file, struct iv_cand *cand)
 577 {
 578   struct iv *iv = cand->iv;
 579
 580   fprintf (file, "candidate %d%s\n",
 581            cand->id, cand->important ? " (important)" : "");
 582
 583   if (cand->depends_on)
 584     {
 585       fprintf (file, "  depends on ");
 586       dump_bitmap (file, cand->depends_on);
 587     }
 588
 589   if (!iv)
 590     {
 591       fprintf (file, "  final value replacement\n");
 592       return;
 593     }
 594
 595   if (cand->var_before)
 596     {
 597       fprintf (file, "  var_before ");
 598       print_generic_expr (file, cand->var_before, TDF_SLIM);
 599       fprintf (file, "\n");
 600     }
 601   if (cand->var_after)
 602     {
 603       fprintf (file, "  var_after ");
 604       print_generic_expr (file, cand->var_after, TDF_SLIM);
 605       fprintf (file, "\n");
 606     }
 607
 608   switch (cand->pos)
 609     {
 610     case IP_NORMAL:
 611       fprintf (file, "  incremented before exit test\n");
 612       break;
 613
 614     case IP_BEFORE_USE:
 615       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 616       break;
 617
 618     case IP_AFTER_USE:
 619       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 620       break;
 621
 622     case IP_END:
 623       fprintf (file, "  incremented at end\n");
 624       break;
 625
 626     case IP_ORIGINAL:
 627       fprintf (file, "  original biv\n");
 628       break;
 629     }
 630
 631   dump_iv (file, iv);
 632 }
 633
 634 /* Returns the info for ssa version VER.  */
 635
 636 static inline struct version_info *
 637 ver_info (struct ivopts_data *data, unsigned ver)
 638 {
 639   return data->version_info + ver;
 640 }
 641
 642 /* Returns the info for ssa name NAME.  */
 643
 644 static inline struct version_info *
 645 name_info (struct ivopts_data *data, tree name)
 646 {
 647   return ver_info (data, SSA_NAME_VERSION (name));
 648 }
 649
 650 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 651    emitted in LOOP.  */
 652
 653 static bool
 654 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 655 {
 656   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 657
 658   gcc_assert (bb);
 659
 660   if (sbb == loop->latch)
 661     return true;
 662
 663   if (sbb != bb)
 664     return false;
 665
 666   return stmt == last_stmt (bb);
 667 }
 668
 669 /* Returns true if STMT if after the place where the original induction
 670    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 671    if the positions are identical.  */
 672
 673 static bool
 674 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 675 {
 676   basic_block cand_bb = gimple_bb (cand->incremented_at);
 677   basic_block stmt_bb = gimple_bb (stmt);
 678
 679   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 680     return false;
 681
 682   if (stmt_bb != cand_bb)
 683     return true;
 684
 685   if (true_if_equal
 686       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 687     return true;
 688   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 689 }
 690
 691 /* Returns true if STMT if after the place where the induction variable
 692    CAND is incremented in LOOP.  */
 693
 694 static bool
 695 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 696 {
 697   switch (cand->pos)
 698     {
 699     case IP_END:
 700       return false;
 701
 702     case IP_NORMAL:
 703       return stmt_after_ip_normal_pos (loop, stmt);
 704
 705     case IP_ORIGINAL:
 706     case IP_AFTER_USE:
 707       return stmt_after_inc_pos (cand, stmt, false);
 708
 709     case IP_BEFORE_USE:
 710       return stmt_after_inc_pos (cand, stmt, true);
 711
 712     default:
 713       gcc_unreachable ();
 714     }
 715 }
 716
 717 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 718
 719 static bool
 720 abnormal_ssa_name_p (tree exp)
 721 {
 722   if (!exp)
 723     return false;
 724
 725   if (TREE_CODE (exp) != SSA_NAME)
 726     return false;
 727
 728   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 729 }
 730
 731 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 732    abnormal phi node.  Callback for for_each_index.  */
 733
 734 static bool
 735 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 736                                   void *data ATTRIBUTE_UNUSED)
 737 {
 738   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 739     {
 740       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 741         return false;
 742       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 743         return false;
 744     }
 745
 746   return !abnormal_ssa_name_p (*index);
 747 }
 748
 749 /* Returns true if EXPR contains a ssa name that occurs in an
 750    abnormal phi node.  */
 751
 752 bool
 753 contains_abnormal_ssa_name_p (tree expr)
 754 {
 755   enum tree_code code;
 756   enum tree_code_class codeclass;
 757
 758   if (!expr)
 759     return false;
 760
 761   code = TREE_CODE (expr);
 762   codeclass = TREE_CODE_CLASS (code);
 763
 764   if (code == SSA_NAME)
 765     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 766
 767   if (code == INTEGER_CST
 768       || is_gimple_min_invariant (expr))
 769     return false;
 770
 771   if (code == ADDR_EXPR)
 772     return !for_each_index (&TREE_OPERAND (expr, 0),
 773                             idx_contains_abnormal_ssa_name_p,
 774                             NULL);
 775
 776   if (code == COND_EXPR)
 777     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 778       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 779       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 780
 781   switch (codeclass)
 782     {
 783     case tcc_binary:
 784     case tcc_comparison:
 785       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 786         return true;
 787
 788       /* Fallthru.  */
 789     case tcc_unary:
 790       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 791         return true;
 792
 793       break;
 794
 795     default:
 796       gcc_unreachable ();
 797     }
 798
 799   return false;
 800 }
 801
 802 /*  Returns the structure describing number of iterations determined from
 803     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 804
 805 static struct tree_niter_desc *
 806 niter_for_exit (struct ivopts_data *data, edge exit)
 807 {
 808   struct tree_niter_desc *desc;
 809   void **slot;
 810
 811   if (!data->niters)
 812     {
 813       data->niters = pointer_map_create ();
 814       slot = NULL;
 815     }
 816   else
 817     slot = pointer_map_contains (data->niters, exit);
 818
 819   if (!slot)
 820     {
 821       /* Try to determine number of iterations.  We cannot safely work with ssa
 822          names that appear in phi nodes on abnormal edges, so that we do not
 823          create overlapping life ranges for them (PR 27283).  */
 824       desc = XNEW (struct tree_niter_desc);
 825       if (!number_of_iterations_exit (data->current_loop,
 826                                       exit, desc, true)
 827           || contains_abnormal_ssa_name_p (desc->niter))
 828         {
 829           XDELETE (desc);
 830           desc = NULL;
 831         }
 832       slot = pointer_map_insert (data->niters, exit);
 833       *slot = desc;
 834     }
 835   else
 836     desc = (struct tree_niter_desc *) *slot;
 837
 838   return desc;
 839 }
 840
 841 /* Returns the structure describing number of iterations determined from
 842    single dominating exit of DATA->current_loop, or NULL if something
 843    goes wrong.  */
 844
 845 static struct tree_niter_desc *
 846 niter_for_single_dom_exit (struct ivopts_data *data)
 847 {
 848   edge exit = single_dom_exit (data->current_loop);
 849
 850   if (!exit)
 851     return NULL;
 852
 853   return niter_for_exit (data, exit);
 854 }
 855
 856 /* Initializes data structures used by the iv optimization pass, stored
 857    in DATA.  */
 858
 859 static void
 860 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 861 {
 862   data->version_info_size = 2 * num_ssa_names;
 863   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 864   data->relevant = BITMAP_ALLOC (NULL);
 865   data->important_candidates = BITMAP_ALLOC (NULL);
 866   data->max_inv_id = 0;
 867   data->niters = NULL;
 868   data->iv_uses.create (20);
 869   data->iv_candidates.create (20);
 870   data->inv_expr_tab.create (10);
 871   data->inv_expr_id = 0;
 872   decl_rtl_to_reset.create (20);
 873 }
 874
 875 /* Returns a memory object to that EXPR points.  In case we are able to
 876    determine that it does not point to any such object, NULL is returned.  */
 877
 878 static tree
 879 determine_base_object (tree expr)
 880 {
 881   enum tree_code code = TREE_CODE (expr);
 882   tree base, obj;
 883
 884   /* If this is a pointer casted to any type, we need to determine
 885      the base object for the pointer; so handle conversions before
 886      throwing away non-pointer expressions.  */
 887   if (CONVERT_EXPR_P (expr))
 888     return determine_base_object (TREE_OPERAND (expr, 0));
 889
 890   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 891     return NULL_TREE;
 892
 893   switch (code)
 894     {
 895     case INTEGER_CST:
 896       return NULL_TREE;
 897
 898     case ADDR_EXPR:
 899       obj = TREE_OPERAND (expr, 0);
 900       base = get_base_address (obj);
 901
 902       if (!base)
 903         return expr;
 904
 905       if (TREE_CODE (base) == MEM_REF)
 906         return determine_base_object (TREE_OPERAND (base, 0));
 907
 908       return fold_convert (ptr_type_node,
 909                            build_fold_addr_expr (base));
 910
 911     case POINTER_PLUS_EXPR:
 912       return determine_base_object (TREE_OPERAND (expr, 0));
 913
 914     case PLUS_EXPR:
 915     case MINUS_EXPR:
 916       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 917       gcc_unreachable ();
 918
 919     default:
 920       return fold_convert (ptr_type_node, expr);
 921     }
 922 }
 923
 924 /* Allocates an induction variable with given initial value BASE and step STEP
 925    for loop LOOP.  */
 926
 927 static struct iv *
 928 alloc_iv (tree base, tree step)
 929 {
 930   tree base_object = base;
 931   struct iv *iv = XCNEW (struct iv);
 932   gcc_assert (step != NULL_TREE);
 933
 934   /* Lower all address expressions except ones with DECL_P as operand.
 935      By doing this:
 936        1) More accurate cost can be computed for address expressions;
 937        2) Duplicate candidates won't be created for bases in different
 938           forms, like &a[0] and &a.  */
 939   STRIP_NOPS (base_object);
 940   if (TREE_CODE (base_object) == ADDR_EXPR
 941       && !DECL_P (TREE_OPERAND (base_object, 0)))
 942     {
 943       aff_tree comb;
 944       double_int size;
 945       base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
 946                                              &comb, &size);
 947       gcc_assert (base_object != NULL_TREE);
 948       base_object = build_fold_addr_expr (base_object);
 949       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
 950     }
 951
 952   iv->base = base;
 953   iv->base_object = determine_base_object (base_object);
 954   iv->step = step;
 955   iv->biv_p = false;
 956   iv->have_use_for = false;
 957   iv->use_id = 0;
 958   iv->ssa_name = NULL_TREE;
 959
 960   return iv;
 961 }
 962
 963 /* Sets STEP and BASE for induction variable IV.  */
 964
 965 static void
 966 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 967 {
 968   struct version_info *info = name_info (data, iv);
 969
 970   gcc_assert (!info->iv);
 971
 972   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 973   info->iv = alloc_iv (base, step);
 974   info->iv->ssa_name = iv;
 975 }
 976
 977 /* Finds induction variable declaration for VAR.  */
 978
 979 static struct iv *
 980 get_iv (struct ivopts_data *data, tree var)
 981 {
 982   basic_block bb;
 983   tree type = TREE_TYPE (var);
 984
 985   if (!POINTER_TYPE_P (type)
 986       && !INTEGRAL_TYPE_P (type))
 987     return NULL;
 988
 989   if (!name_info (data, var)->iv)
 990     {
 991       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 992
 993       if (!bb
 994           || !flow_bb_inside_loop_p (data->current_loop, bb))
 995         set_iv (data, var, var, build_int_cst (type, 0));
 996     }
 997
 998   return name_info (data, var)->iv;
 999 }
1000
1001 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1002    not define a simple affine biv with nonzero step.  */
1003
1004 static tree
1005 determine_biv_step (gimple phi)
1006 {
1007   struct loop *loop = gimple_bb (phi)->loop_father;
1008   tree name = PHI_RESULT (phi);
1009   affine_iv iv;
1010
1011   if (virtual_operand_p (name))
1012     return NULL_TREE;
1013
1014   if (!simple_iv (loop, loop, name, &iv, true))
1015     return NULL_TREE;
1016
1017   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1018 }
1019
1020 /* Finds basic ivs.  */
1021
1022 static bool
1023 find_bivs (struct ivopts_data *data)
1024 {
1025   gimple phi;
1026   tree step, type, base;
1027   bool found = false;
1028   struct loop *loop = data->current_loop;
1029   gimple_stmt_iterator psi;
1030
1031   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1032     {
1033       phi = gsi_stmt (psi);
1034
1035       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1036         continue;
1037
1038       step = determine_biv_step (phi);
1039       if (!step)
1040         continue;
1041
1042       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1043       base = expand_simple_operations (base);
1044       if (contains_abnormal_ssa_name_p (base)
1045           || contains_abnormal_ssa_name_p (step))
1046         continue;
1047
1048       type = TREE_TYPE (PHI_RESULT (phi));
1049       base = fold_convert (type, base);
1050       if (step)
1051         {
1052           if (POINTER_TYPE_P (type))
1053             step = convert_to_ptrofftype (step);
1054           else
1055             step = fold_convert (type, step);
1056         }
1057
1058       set_iv (data, PHI_RESULT (phi), base, step);
1059       found = true;
1060     }
1061
1062   return found;
1063 }
1064
1065 /* Marks basic ivs.  */
1066
1067 static void
1068 mark_bivs (struct ivopts_data *data)
1069 {
1070   gimple phi;
1071   tree var;
1072   struct iv *iv, *incr_iv;
1073   struct loop *loop = data->current_loop;
1074   basic_block incr_bb;
1075   gimple_stmt_iterator psi;
1076
1077   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1078     {
1079       phi = gsi_stmt (psi);
1080
1081       iv = get_iv (data, PHI_RESULT (phi));
1082       if (!iv)
1083         continue;
1084
1085       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1086       incr_iv = get_iv (data, var);
1087       if (!incr_iv)
1088         continue;
1089
1090       /* If the increment is in the subloop, ignore it.  */
1091       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1092       if (incr_bb->loop_father != data->current_loop
1093           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1094         continue;
1095
1096       iv->biv_p = true;
1097       incr_iv->biv_p = true;
1098     }
1099 }
1100
1101 /* Checks whether STMT defines a linear induction variable and stores its
1102    parameters to IV.  */
1103
1104 static bool
1105 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1106 {
1107   tree lhs;
1108   struct loop *loop = data->current_loop;
1109
1110   iv->base = NULL_TREE;
1111   iv->step = NULL_TREE;
1112
1113   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1114     return false;
1115
1116   lhs = gimple_assign_lhs (stmt);
1117   if (TREE_CODE (lhs) != SSA_NAME)
1118     return false;
1119
1120   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1121     return false;
1122   iv->base = expand_simple_operations (iv->base);
1123
1124   if (contains_abnormal_ssa_name_p (iv->base)
1125       || contains_abnormal_ssa_name_p (iv->step))
1126     return false;
1127
1128   /* If STMT could throw, then do not consider STMT as defining a GIV.
1129      While this will suppress optimizations, we can not safely delete this
1130      GIV and associated statements, even if it appears it is not used.  */
1131   if (stmt_could_throw_p (stmt))
1132     return false;
1133
1134   return true;
1135 }
1136
1137 /* Finds general ivs in statement STMT.  */
1138
1139 static void
1140 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1141 {
1142   affine_iv iv;
1143
1144   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1145     return;
1146
1147   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1148 }
1149
1150 /* Finds general ivs in basic block BB.  */
1151
1152 static void
1153 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1154 {
1155   gimple_stmt_iterator bsi;
1156
1157   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1158     find_givs_in_stmt (data, gsi_stmt (bsi));
1159 }
1160
1161 /* Finds general ivs.  */
1162
1163 static void
1164 find_givs (struct ivopts_data *data)
1165 {
1166   struct loop *loop = data->current_loop;
1167   basic_block *body = get_loop_body_in_dom_order (loop);
1168   unsigned i;
1169
1170   for (i = 0; i < loop->num_nodes; i++)
1171     find_givs_in_bb (data, body[i]);
1172   free (body);
1173 }
1174
1175 /* For each ssa name defined in LOOP determines whether it is an induction
1176    variable and if so, its initial value and step.  */
1177
1178 static bool
1179 find_induction_variables (struct ivopts_data *data)
1180 {
1181   unsigned i;
1182   bitmap_iterator bi;
1183
1184   if (!find_bivs (data))
1185     return false;
1186
1187   find_givs (data);
1188   mark_bivs (data);
1189
1190   if (dump_file && (dump_flags & TDF_DETAILS))
1191     {
1192       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1193
1194       if (niter)
1195         {
1196           fprintf (dump_file, "  number of iterations ");
1197           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1198           if (!integer_zerop (niter->may_be_zero))
1199             {
1200               fprintf (dump_file, "; zero if ");
1201               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1202             }
1203           fprintf (dump_file, "\n\n");
1204         };
1205
1206       fprintf (dump_file, "Induction variables:\n\n");
1207
1208       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1209         {
1210           if (ver_info (data, i)->iv)
1211             dump_iv (dump_file, ver_info (data, i)->iv);
1212         }
1213     }
1214
1215   return true;
1216 }
1217
1218 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1219
1220 static struct iv_use *
1221 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1222             gimple stmt, enum use_type use_type)
1223 {
1224   struct iv_use *use = XCNEW (struct iv_use);
1225
1226   use->id = n_iv_uses (data);
1227   use->type = use_type;
1228   use->iv = iv;
1229   use->stmt = stmt;
1230   use->op_p = use_p;
1231   use->related_cands = BITMAP_ALLOC (NULL);
1232
1233   /* To avoid showing ssa name in the dumps, if it was not reset by the
1234      caller.  */
1235   iv->ssa_name = NULL_TREE;
1236
1237   if (dump_file && (dump_flags & TDF_DETAILS))
1238     dump_use (dump_file, use);
1239
1240   data->iv_uses.safe_push (use);
1241
1242   return use;
1243 }
1244
1245 /* Checks whether OP is a loop-level invariant and if so, records it.
1246    NONLINEAR_USE is true if the invariant is used in a way we do not
1247    handle specially.  */
1248
1249 static void
1250 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1251 {
1252   basic_block bb;
1253   struct version_info *info;
1254
1255   if (TREE_CODE (op) != SSA_NAME
1256       || virtual_operand_p (op))
1257     return;
1258
1259   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1260   if (bb
1261       && flow_bb_inside_loop_p (data->current_loop, bb))
1262     return;
1263
1264   info = name_info (data, op);
1265   info->name = op;
1266   info->has_nonlin_use |= nonlinear_use;
1267   if (!info->inv_id)
1268     info->inv_id = ++data->max_inv_id;
1269   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1270 }
1271
1272 /* Checks whether the use OP is interesting and if so, records it.  */
1273
1274 static struct iv_use *
1275 find_interesting_uses_op (struct ivopts_data *data, tree op)
1276 {
1277   struct iv *iv;
1278   struct iv *civ;
1279   gimple stmt;
1280   struct iv_use *use;
1281
1282   if (TREE_CODE (op) != SSA_NAME)
1283     return NULL;
1284
1285   iv = get_iv (data, op);
1286   if (!iv)
1287     return NULL;
1288
1289   if (iv->have_use_for)
1290     {
1291       use = iv_use (data, iv->use_id);
1292
1293       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1294       return use;
1295     }
1296
1297   if (integer_zerop (iv->step))
1298     {
1299       record_invariant (data, op, true);
1300       return NULL;
1301     }
1302   iv->have_use_for = true;
1303
1304   civ = XNEW (struct iv);
1305   *civ = *iv;
1306
1307   stmt = SSA_NAME_DEF_STMT (op);
1308   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1309               || is_gimple_assign (stmt));
1310
1311   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1312   iv->use_id = use->id;
1313
1314   return use;
1315 }
1316
1317 /* Given a condition in statement STMT, checks whether it is a compare
1318    of an induction variable and an invariant.  If this is the case,
1319    CONTROL_VAR is set to location of the iv, BOUND to the location of
1320    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1321    induction variable descriptions, and true is returned.  If this is not
1322    the case, CONTROL_VAR and BOUND are set to the arguments of the
1323    condition and false is returned.  */
1324
1325 static bool
1326 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1327                        tree **control_var, tree **bound,
1328                        struct iv **iv_var, struct iv **iv_bound)
1329 {
1330   /* The objects returned when COND has constant operands.  */
1331   static struct iv const_iv;
1332   static tree zero;
1333   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1334   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1335   bool ret = false;
1336
1337   if (gimple_code (stmt) == GIMPLE_COND)
1338     {
1339       op0 = gimple_cond_lhs_ptr (stmt);
1340       op1 = gimple_cond_rhs_ptr (stmt);
1341     }
1342   else
1343     {
1344       op0 = gimple_assign_rhs1_ptr (stmt);
1345       op1 = gimple_assign_rhs2_ptr (stmt);
1346     }
1347
1348   zero = integer_zero_node;
1349   const_iv.step = integer_zero_node;
1350
1351   if (TREE_CODE (*op0) == SSA_NAME)
1352     iv0 = get_iv (data, *op0);
1353   if (TREE_CODE (*op1) == SSA_NAME)
1354     iv1 = get_iv (data, *op1);
1355
1356   /* Exactly one of the compared values must be an iv, and the other one must
1357      be an invariant.  */
1358   if (!iv0 || !iv1)
1359     goto end;
1360
1361   if (integer_zerop (iv0->step))
1362     {
1363       /* Control variable may be on the other side.  */
1364       tmp_op = op0; op0 = op1; op1 = tmp_op;
1365       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1366     }
1367   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1368
1369 end:
1370   if (control_var)
1371     *control_var = op0;;
1372   if (iv_var)
1373     *iv_var = iv0;;
1374   if (bound)
1375     *bound = op1;
1376   if (iv_bound)
1377     *iv_bound = iv1;
1378
1379   return ret;
1380 }
1381
1382 /* Checks whether the condition in STMT is interesting and if so,
1383    records it.  */
1384
1385 static void
1386 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1387 {
1388   tree *var_p, *bound_p;
1389   struct iv *var_iv, *civ;
1390
1391   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1392     {
1393       find_interesting_uses_op (data, *var_p);
1394       find_interesting_uses_op (data, *bound_p);
1395       return;
1396     }
1397
1398   civ = XNEW (struct iv);
1399   *civ = *var_iv;
1400   record_use (data, NULL, civ, stmt, USE_COMPARE);
1401 }
1402
1403 /* Returns the outermost loop EXPR is obviously invariant in
1404    relative to the loop LOOP, i.e. if all its operands are defined
1405    outside of the returned loop.  Returns NULL if EXPR is not
1406    even obviously invariant in LOOP.  */
1407
1408 struct loop *
1409 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1410 {
1411   basic_block def_bb;
1412   unsigned i, len;
1413
1414   if (is_gimple_min_invariant (expr))
1415     return current_loops->tree_root;
1416
1417   if (TREE_CODE (expr) == SSA_NAME)
1418     {
1419       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1420       if (def_bb)
1421         {
1422           if (flow_bb_inside_loop_p (loop, def_bb))
1423             return NULL;
1424           return superloop_at_depth (loop,
1425                                      loop_depth (def_bb->loop_father) + 1);
1426         }
1427
1428       return current_loops->tree_root;
1429     }
1430
1431   if (!EXPR_P (expr))
1432     return NULL;
1433
1434   unsigned maxdepth = 0;
1435   len = TREE_OPERAND_LENGTH (expr);
1436   for (i = 0; i < len; i++)
1437     {
1438       struct loop *ivloop;
1439       if (!TREE_OPERAND (expr, i))
1440         continue;
1441
1442       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1443       if (!ivloop)
1444         return NULL;
1445       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1446     }
1447
1448   return superloop_at_depth (loop, maxdepth);
1449 }
1450
1451 /* Returns true if expression EXPR is obviously invariant in LOOP,
1452    i.e. if all its operands are defined outside of the LOOP.  LOOP
1453    should not be the function body.  */
1454
1455 bool
1456 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1457 {
1458   basic_block def_bb;
1459   unsigned i, len;
1460
1461   gcc_assert (loop_depth (loop) > 0);
1462
1463   if (is_gimple_min_invariant (expr))
1464     return true;
1465
1466   if (TREE_CODE (expr) == SSA_NAME)
1467     {
1468       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1469       if (def_bb
1470           && flow_bb_inside_loop_p (loop, def_bb))
1471         return false;
1472
1473       return true;
1474     }
1475
1476   if (!EXPR_P (expr))
1477     return false;
1478
1479   len = TREE_OPERAND_LENGTH (expr);
1480   for (i = 0; i < len; i++)
1481     if (TREE_OPERAND (expr, i)
1482         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1483       return false;
1484
1485   return true;
1486 }
1487
1488 /* Cumulates the steps of indices into DATA and replaces their values with the
1489    initial ones.  Returns false when the value of the index cannot be determined.
1490    Callback for for_each_index.  */
1491
1492 struct ifs_ivopts_data
1493 {
1494   struct ivopts_data *ivopts_data;
1495   gimple stmt;
1496   tree step;
1497 };
1498
1499 static bool
1500 idx_find_step (tree base, tree *idx, void *data)
1501 {
1502   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1503   struct iv *iv;
1504   tree step, iv_base, iv_step, lbound, off;
1505   struct loop *loop = dta->ivopts_data->current_loop;
1506
1507   /* If base is a component ref, require that the offset of the reference
1508      be invariant.  */
1509   if (TREE_CODE (base) == COMPONENT_REF)
1510     {
1511       off = component_ref_field_offset (base);
1512       return expr_invariant_in_loop_p (loop, off);
1513     }
1514
1515   /* If base is array, first check whether we will be able to move the
1516      reference out of the loop (in order to take its address in strength
1517      reduction).  In order for this to work we need both lower bound
1518      and step to be loop invariants.  */
1519   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1520     {
1521       /* Moreover, for a range, the size needs to be invariant as well.  */
1522       if (TREE_CODE (base) == ARRAY_RANGE_REF
1523           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1524         return false;
1525
1526       step = array_ref_element_size (base);
1527       lbound = array_ref_low_bound (base);
1528
1529       if (!expr_invariant_in_loop_p (loop, step)
1530           || !expr_invariant_in_loop_p (loop, lbound))
1531         return false;
1532     }
1533
1534   if (TREE_CODE (*idx) != SSA_NAME)
1535     return true;
1536
1537   iv = get_iv (dta->ivopts_data, *idx);
1538   if (!iv)
1539     return false;
1540
1541   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1542           *&x[0], which is not folded and does not trigger the
1543           ARRAY_REF path below.  */
1544   *idx = iv->base;
1545
1546   if (integer_zerop (iv->step))
1547     return true;
1548
1549   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1550     {
1551       step = array_ref_element_size (base);
1552
1553       /* We only handle addresses whose step is an integer constant.  */
1554       if (TREE_CODE (step) != INTEGER_CST)
1555         return false;
1556     }
1557   else
1558     /* The step for pointer arithmetics already is 1 byte.  */
1559     step = size_one_node;
1560
1561   iv_base = iv->base;
1562   iv_step = iv->step;
1563   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1564                             sizetype, &iv_base, &iv_step, dta->stmt,
1565                             false))
1566     {
1567       /* The index might wrap.  */
1568       return false;
1569     }
1570
1571   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1572   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1573
1574   return true;
1575 }
1576
1577 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1578    object is passed to it in DATA.  */
1579
1580 static bool
1581 idx_record_use (tree base, tree *idx,
1582                 void *vdata)
1583 {
1584   struct ivopts_data *data = (struct ivopts_data *) vdata;
1585   find_interesting_uses_op (data, *idx);
1586   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1587     {
1588       find_interesting_uses_op (data, array_ref_element_size (base));
1589       find_interesting_uses_op (data, array_ref_low_bound (base));
1590     }
1591   return true;
1592 }
1593
1594 /* If we can prove that TOP = cst * BOT for some constant cst,
1595    store cst to MUL and return true.  Otherwise return false.
1596    The returned value is always sign-extended, regardless of the
1597    signedness of TOP and BOT.  */
1598
1599 static bool
1600 constant_multiple_of (tree top, tree bot, double_int *mul)
1601 {
1602   tree mby;
1603   enum tree_code code;
1604   double_int res, p0, p1;
1605   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1606
1607   STRIP_NOPS (top);
1608   STRIP_NOPS (bot);
1609
1610   if (operand_equal_p (top, bot, 0))
1611     {
1612       *mul = double_int_one;
1613       return true;
1614     }
1615
1616   code = TREE_CODE (top);
1617   switch (code)
1618     {
1619     case MULT_EXPR:
1620       mby = TREE_OPERAND (top, 1);
1621       if (TREE_CODE (mby) != INTEGER_CST)
1622         return false;
1623
1624       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1625         return false;
1626
1627       *mul = (res * tree_to_double_int (mby)).sext (precision);
1628       return true;
1629
1630     case PLUS_EXPR:
1631     case MINUS_EXPR:
1632       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1633           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1634         return false;
1635
1636       if (code == MINUS_EXPR)
1637         p1 = -p1;
1638       *mul = (p0 + p1).sext (precision);
1639       return true;
1640
1641     case INTEGER_CST:
1642       if (TREE_CODE (bot) != INTEGER_CST)
1643         return false;
1644
1645       p0 = tree_to_double_int (top).sext (precision);
1646       p1 = tree_to_double_int (bot).sext (precision);
1647       if (p1.is_zero ())
1648         return false;
1649       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1650       return res.is_zero ();
1651
1652     default:
1653       return false;
1654     }
1655 }
1656
1657 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1658
1659 static bool
1660 may_be_unaligned_p (tree ref, tree step)
1661 {
1662   tree base;
1663   tree base_type;
1664   HOST_WIDE_INT bitsize;
1665   HOST_WIDE_INT bitpos;
1666   tree toffset;
1667   enum machine_mode mode;
1668   int unsignedp, volatilep;
1669   unsigned base_align;
1670
1671   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1672      thus they are not misaligned.  */
1673   if (TREE_CODE (ref) == TARGET_MEM_REF)
1674     return false;
1675
1676   /* The test below is basically copy of what expr.c:normal_inner_ref
1677      does to check whether the object must be loaded by parts when
1678      STRICT_ALIGNMENT is true.  */
1679   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1680                               &unsignedp, &volatilep, true);
1681   base_type = TREE_TYPE (base);
1682   base_align = get_object_alignment (base);
1683   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1684
1685   if (mode != BLKmode)
1686     {
1687       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1688
1689       if (base_align < mode_align
1690           || (bitpos % mode_align) != 0
1691           || (bitpos % BITS_PER_UNIT) != 0)
1692         return true;
1693
1694       if (toffset
1695           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1696         return true;
1697
1698       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1699         return true;
1700     }
1701
1702   return false;
1703 }
1704
1705 /* Return true if EXPR may be non-addressable.   */
1706
1707 bool
1708 may_be_nonaddressable_p (tree expr)
1709 {
1710   switch (TREE_CODE (expr))
1711     {
1712     case TARGET_MEM_REF:
1713       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1714          target, thus they are always addressable.  */
1715       return false;
1716
1717     case COMPONENT_REF:
1718       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1719              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1720
1721     case VIEW_CONVERT_EXPR:
1722       /* This kind of view-conversions may wrap non-addressable objects
1723          and make them look addressable.  After some processing the
1724          non-addressability may be uncovered again, causing ADDR_EXPRs
1725          of inappropriate objects to be built.  */
1726       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1727           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1728         return true;
1729
1730       /* ... fall through ... */
1731
1732     case ARRAY_REF:
1733     case ARRAY_RANGE_REF:
1734       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1735
1736     CASE_CONVERT:
1737       return true;
1738
1739     default:
1740       break;
1741     }
1742
1743   return false;
1744 }
1745
1746 /* Finds addresses in *OP_P inside STMT.  */
1747
1748 static void
1749 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1750 {
1751   tree base = *op_p, step = size_zero_node;
1752   struct iv *civ;
1753   struct ifs_ivopts_data ifs_ivopts_data;
1754
1755   /* Do not play with volatile memory references.  A bit too conservative,
1756      perhaps, but safe.  */
1757   if (gimple_has_volatile_ops (stmt))
1758     goto fail;
1759
1760   /* Ignore bitfields for now.  Not really something terribly complicated
1761      to handle.  TODO.  */
1762   if (TREE_CODE (base) == BIT_FIELD_REF)
1763     goto fail;
1764
1765   base = unshare_expr (base);
1766
1767   if (TREE_CODE (base) == TARGET_MEM_REF)
1768     {
1769       tree type = build_pointer_type (TREE_TYPE (base));
1770       tree astep;
1771
1772       if (TMR_BASE (base)
1773           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1774         {
1775           civ = get_iv (data, TMR_BASE (base));
1776           if (!civ)
1777             goto fail;
1778
1779           TMR_BASE (base) = civ->base;
1780           step = civ->step;
1781         }
1782       if (TMR_INDEX2 (base)
1783           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1784         {
1785           civ = get_iv (data, TMR_INDEX2 (base));
1786           if (!civ)
1787             goto fail;
1788
1789           TMR_INDEX2 (base) = civ->base;
1790           step = civ->step;
1791         }
1792       if (TMR_INDEX (base)
1793           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1794         {
1795           civ = get_iv (data, TMR_INDEX (base));
1796           if (!civ)
1797             goto fail;
1798
1799           TMR_INDEX (base) = civ->base;
1800           astep = civ->step;
1801
1802           if (astep)
1803             {
1804               if (TMR_STEP (base))
1805                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1806
1807               step = fold_build2 (PLUS_EXPR, type, step, astep);
1808             }
1809         }
1810
1811       if (integer_zerop (step))
1812         goto fail;
1813       base = tree_mem_ref_addr (type, base);
1814     }
1815   else
1816     {
1817       ifs_ivopts_data.ivopts_data = data;
1818       ifs_ivopts_data.stmt = stmt;
1819       ifs_ivopts_data.step = size_zero_node;
1820       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1821           || integer_zerop (ifs_ivopts_data.step))
1822         goto fail;
1823       step = ifs_ivopts_data.step;
1824
1825       /* Check that the base expression is addressable.  This needs
1826          to be done after substituting bases of IVs into it.  */
1827       if (may_be_nonaddressable_p (base))
1828         goto fail;
1829
1830       /* Moreover, on strict alignment platforms, check that it is
1831          sufficiently aligned.  */
1832       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1833         goto fail;
1834
1835       base = build_fold_addr_expr (base);
1836
1837       /* Substituting bases of IVs into the base expression might
1838          have caused folding opportunities.  */
1839       if (TREE_CODE (base) == ADDR_EXPR)
1840         {
1841           tree *ref = &TREE_OPERAND (base, 0);
1842           while (handled_component_p (*ref))
1843             ref = &TREE_OPERAND (*ref, 0);
1844           if (TREE_CODE (*ref) == MEM_REF)
1845             {
1846               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1847                                       TREE_OPERAND (*ref, 0),
1848                                       TREE_OPERAND (*ref, 1));
1849               if (tem)
1850                 *ref = tem;
1851             }
1852         }
1853     }
1854
1855   civ = alloc_iv (base, step);
1856   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1857   return;
1858
1859 fail:
1860   for_each_index (op_p, idx_record_use, data);
1861 }
1862
1863 /* Finds and records invariants used in STMT.  */
1864
1865 static void
1866 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1867 {
1868   ssa_op_iter iter;
1869   use_operand_p use_p;
1870   tree op;
1871
1872   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1873     {
1874       op = USE_FROM_PTR (use_p);
1875       record_invariant (data, op, false);
1876     }
1877 }
1878
1879 /* Finds interesting uses of induction variables in the statement STMT.  */
1880
1881 static void
1882 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1883 {
1884   struct iv *iv;
1885   tree op, *lhs, *rhs;
1886   ssa_op_iter iter;
1887   use_operand_p use_p;
1888   enum tree_code code;
1889
1890   find_invariants_stmt (data, stmt);
1891
1892   if (gimple_code (stmt) == GIMPLE_COND)
1893     {
1894       find_interesting_uses_cond (data, stmt);
1895       return;
1896     }
1897
1898   if (is_gimple_assign (stmt))
1899     {
1900       lhs = gimple_assign_lhs_ptr (stmt);
1901       rhs = gimple_assign_rhs1_ptr (stmt);
1902
1903       if (TREE_CODE (*lhs) == SSA_NAME)
1904         {
1905           /* If the statement defines an induction variable, the uses are not
1906              interesting by themselves.  */
1907
1908           iv = get_iv (data, *lhs);
1909
1910           if (iv && !integer_zerop (iv->step))
1911             return;
1912         }
1913
1914       code = gimple_assign_rhs_code (stmt);
1915       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1916           && (REFERENCE_CLASS_P (*rhs)
1917               || is_gimple_val (*rhs)))
1918         {
1919           if (REFERENCE_CLASS_P (*rhs))
1920             find_interesting_uses_address (data, stmt, rhs);
1921           else
1922             find_interesting_uses_op (data, *rhs);
1923
1924           if (REFERENCE_CLASS_P (*lhs))
1925             find_interesting_uses_address (data, stmt, lhs);
1926           return;
1927         }
1928       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1929         {
1930           find_interesting_uses_cond (data, stmt);
1931           return;
1932         }
1933
1934       /* TODO -- we should also handle address uses of type
1935
1936          memory = call (whatever);
1937
1938          and
1939
1940          call (memory).  */
1941     }
1942
1943   if (gimple_code (stmt) == GIMPLE_PHI
1944       && gimple_bb (stmt) == data->current_loop->header)
1945     {
1946       iv = get_iv (data, PHI_RESULT (stmt));
1947
1948       if (iv && !integer_zerop (iv->step))
1949         return;
1950     }
1951
1952   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1953     {
1954       op = USE_FROM_PTR (use_p);
1955
1956       if (TREE_CODE (op) != SSA_NAME)
1957         continue;
1958
1959       iv = get_iv (data, op);
1960       if (!iv)
1961         continue;
1962
1963       find_interesting_uses_op (data, op);
1964     }
1965 }
1966
1967 /* Finds interesting uses of induction variables outside of loops
1968    on loop exit edge EXIT.  */
1969
1970 static void
1971 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1972 {
1973   gimple phi;
1974   gimple_stmt_iterator psi;
1975   tree def;
1976
1977   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1978     {
1979       phi = gsi_stmt (psi);
1980       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1981       if (!virtual_operand_p (def))
1982         find_interesting_uses_op (data, def);
1983     }
1984 }
1985
1986 /* Finds uses of the induction variables that are interesting.  */
1987
1988 static void
1989 find_interesting_uses (struct ivopts_data *data)
1990 {
1991   basic_block bb;
1992   gimple_stmt_iterator bsi;
1993   basic_block *body = get_loop_body (data->current_loop);
1994   unsigned i;
1995   struct version_info *info;
1996   edge e;
1997
1998   if (dump_file && (dump_flags & TDF_DETAILS))
1999     fprintf (dump_file, "Uses:\n\n");
2000
2001   for (i = 0; i < data->current_loop->num_nodes; i++)
2002     {
2003       edge_iterator ei;
2004       bb = body[i];
2005
2006       FOR_EACH_EDGE (e, ei, bb->succs)
2007         if (e->dest != EXIT_BLOCK_PTR
2008             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2009           find_interesting_uses_outside (data, e);
2010
2011       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2012         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2013       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2014         if (!is_gimple_debug (gsi_stmt (bsi)))
2015           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2016     }
2017
2018   if (dump_file && (dump_flags & TDF_DETAILS))
2019     {
2020       bitmap_iterator bi;
2021
2022       fprintf (dump_file, "\n");
2023
2024       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2025         {
2026           info = ver_info (data, i);
2027           if (info->inv_id)
2028             {
2029               fprintf (dump_file, "  ");
2030               print_generic_expr (dump_file, info->name, TDF_SLIM);
2031               fprintf (dump_file, " is invariant (%d)%s\n",
2032                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2033             }
2034         }
2035
2036       fprintf (dump_file, "\n");
2037     }
2038
2039   free (body);
2040 }
2041
2042 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2043    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2044    we are at the top-level of the processed address.  */
2045
2046 static tree
2047 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2048                 HOST_WIDE_INT *offset)
2049 {
2050   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2051   enum tree_code code;
2052   tree type, orig_type = TREE_TYPE (expr);
2053   HOST_WIDE_INT off0, off1, st;
2054   tree orig_expr = expr;
2055
2056   STRIP_NOPS (expr);
2057
2058   type = TREE_TYPE (expr);
2059   code = TREE_CODE (expr);
2060   *offset = 0;
2061
2062   switch (code)
2063     {
2064     case INTEGER_CST:
2065       if (!cst_and_fits_in_hwi (expr)
2066           || integer_zerop (expr))
2067         return orig_expr;
2068
2069       *offset = int_cst_value (expr);
2070       return build_int_cst (orig_type, 0);
2071
2072     case POINTER_PLUS_EXPR:
2073     case PLUS_EXPR:
2074     case MINUS_EXPR:
2075       op0 = TREE_OPERAND (expr, 0);
2076       op1 = TREE_OPERAND (expr, 1);
2077
2078       op0 = strip_offset_1 (op0, false, false, &off0);
2079       op1 = strip_offset_1 (op1, false, false, &off1);
2080
2081       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2082       if (op0 == TREE_OPERAND (expr, 0)
2083           && op1 == TREE_OPERAND (expr, 1))
2084         return orig_expr;
2085
2086       if (integer_zerop (op1))
2087         expr = op0;
2088       else if (integer_zerop (op0))
2089         {
2090           if (code == MINUS_EXPR)
2091             expr = fold_build1 (NEGATE_EXPR, type, op1);
2092           else
2093             expr = op1;
2094         }
2095       else
2096         expr = fold_build2 (code, type, op0, op1);
2097
2098       return fold_convert (orig_type, expr);
2099
2100     case MULT_EXPR:
2101       op1 = TREE_OPERAND (expr, 1);
2102       if (!cst_and_fits_in_hwi (op1))
2103         return orig_expr;
2104
2105       op0 = TREE_OPERAND (expr, 0);
2106       op0 = strip_offset_1 (op0, false, false, &off0);
2107       if (op0 == TREE_OPERAND (expr, 0))
2108         return orig_expr;
2109
2110       *offset = off0 * int_cst_value (op1);
2111       if (integer_zerop (op0))
2112         expr = op0;
2113       else
2114         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2115
2116       return fold_convert (orig_type, expr);
2117
2118     case ARRAY_REF:
2119     case ARRAY_RANGE_REF:
2120       if (!inside_addr)
2121         return orig_expr;
2122
2123       step = array_ref_element_size (expr);
2124       if (!cst_and_fits_in_hwi (step))
2125         break;
2126
2127       st = int_cst_value (step);
2128       op1 = TREE_OPERAND (expr, 1);
2129       op1 = strip_offset_1 (op1, false, false, &off1);
2130       *offset = off1 * st;
2131
2132       if (top_compref
2133           && integer_zerop (op1))
2134         {
2135           /* Strip the component reference completely.  */
2136           op0 = TREE_OPERAND (expr, 0);
2137           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2138           *offset += off0;
2139           return op0;
2140         }
2141       break;
2142
2143     case COMPONENT_REF:
2144       {
2145         tree field;
2146
2147         if (!inside_addr)
2148           return orig_expr;
2149
2150         tmp = component_ref_field_offset (expr);
2151         field = TREE_OPERAND (expr, 1);
2152         if (top_compref
2153             && cst_and_fits_in_hwi (tmp)
2154             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2155           {
2156             HOST_WIDE_INT boffset, abs_off;
2157
2158             /* Strip the component reference completely.  */
2159             op0 = TREE_OPERAND (expr, 0);
2160             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2161             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2162             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2163             if (boffset < 0)
2164               abs_off = -abs_off;
2165
2166             *offset = off0 + int_cst_value (tmp) + abs_off;
2167             return op0;
2168           }
2169       }
2170       break;
2171
2172     case ADDR_EXPR:
2173       op0 = TREE_OPERAND (expr, 0);
2174       op0 = strip_offset_1 (op0, true, true, &off0);
2175       *offset += off0;
2176
2177       if (op0 == TREE_OPERAND (expr, 0))
2178         return orig_expr;
2179
2180       expr = build_fold_addr_expr (op0);
2181       return fold_convert (orig_type, expr);
2182
2183     case MEM_REF:
2184       /* ???  Offset operand?  */
2185       inside_addr = false;
2186       break;
2187
2188     default:
2189       return orig_expr;
2190     }
2191
2192   /* Default handling of expressions for that we want to recurse into
2193      the first operand.  */
2194   op0 = TREE_OPERAND (expr, 0);
2195   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2196   *offset += off0;
2197
2198   if (op0 == TREE_OPERAND (expr, 0)
2199       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2200     return orig_expr;
2201
2202   expr = copy_node (expr);
2203   TREE_OPERAND (expr, 0) = op0;
2204   if (op1)
2205     TREE_OPERAND (expr, 1) = op1;
2206
2207   /* Inside address, we might strip the top level component references,
2208      thus changing type of the expression.  Handling of ADDR_EXPR
2209      will fix that.  */
2210   expr = fold_convert (orig_type, expr);
2211
2212   return expr;
2213 }
2214
2215 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2216
2217 static tree
2218 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2219 {
2220   HOST_WIDE_INT off;
2221   tree core = strip_offset_1 (expr, false, false, &off);
2222   *offset = off;
2223   return core;
2224 }
2225
2226 /* Returns variant of TYPE that can be used as base for different uses.
2227    We return unsigned type with the same precision, which avoids problems
2228    with overflows.  */
2229
2230 static tree
2231 generic_type_for (tree type)
2232 {
2233   if (POINTER_TYPE_P (type))
2234     return unsigned_type_for (type);
2235
2236   if (TYPE_UNSIGNED (type))
2237     return type;
2238
2239   return unsigned_type_for (type);
2240 }
2241
2242 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2243    the bitmap to that we should store it.  */
2244
2245 static struct ivopts_data *fd_ivopts_data;
2246 static tree
2247 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2248 {
2249   bitmap *depends_on = (bitmap *) data;
2250   struct version_info *info;
2251
2252   if (TREE_CODE (*expr_p) != SSA_NAME)
2253     return NULL_TREE;
2254   info = name_info (fd_ivopts_data, *expr_p);
2255
2256   if (!info->inv_id || info->has_nonlin_use)
2257     return NULL_TREE;
2258
2259   if (!*depends_on)
2260     *depends_on = BITMAP_ALLOC (NULL);
2261   bitmap_set_bit (*depends_on, info->inv_id);
2262
2263   return NULL_TREE;
2264 }
2265
2266 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2267    position to POS.  If USE is not NULL, the candidate is set as related to
2268    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2269    replacement of the final value of the iv by a direct computation.  */
2270
2271 static struct iv_cand *
2272 add_candidate_1 (struct ivopts_data *data,
2273                  tree base, tree step, bool important, enum iv_position pos,
2274                  struct iv_use *use, gimple incremented_at)
2275 {
2276   unsigned i;
2277   struct iv_cand *cand = NULL;
2278   tree type, orig_type;
2279
2280   /* For non-original variables, make sure their values are computed in a type
2281      that does not invoke undefined behavior on overflows (since in general,
2282      we cannot prove that these induction variables are non-wrapping).  */
2283   if (pos != IP_ORIGINAL)
2284     {
2285       orig_type = TREE_TYPE (base);
2286       type = generic_type_for (orig_type);
2287       if (type != orig_type)
2288         {
2289           base = fold_convert (type, base);
2290           step = fold_convert (type, step);
2291         }
2292     }
2293
2294   for (i = 0; i < n_iv_cands (data); i++)
2295     {
2296       cand = iv_cand (data, i);
2297
2298       if (cand->pos != pos)
2299         continue;
2300
2301       if (cand->incremented_at != incremented_at
2302           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2303               && cand->ainc_use != use))
2304         continue;
2305
2306       if (!cand->iv)
2307         {
2308           if (!base && !step)
2309             break;
2310
2311           continue;
2312         }
2313
2314       if (!base && !step)
2315         continue;
2316
2317       if (operand_equal_p (base, cand->iv->base, 0)
2318           && operand_equal_p (step, cand->iv->step, 0)
2319           && (TYPE_PRECISION (TREE_TYPE (base))
2320               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2321         break;
2322     }
2323
2324   if (i == n_iv_cands (data))
2325     {
2326       cand = XCNEW (struct iv_cand);
2327       cand->id = i;
2328
2329       if (!base && !step)
2330         cand->iv = NULL;
2331       else
2332         cand->iv = alloc_iv (base, step);
2333
2334       cand->pos = pos;
2335       if (pos != IP_ORIGINAL && cand->iv)
2336         {
2337           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2338           cand->var_after = cand->var_before;
2339         }
2340       cand->important = important;
2341       cand->incremented_at = incremented_at;
2342       data->iv_candidates.safe_push (cand);
2343
2344       if (step
2345           && TREE_CODE (step) != INTEGER_CST)
2346         {
2347           fd_ivopts_data = data;
2348           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2349         }
2350
2351       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2352         cand->ainc_use = use;
2353       else
2354         cand->ainc_use = NULL;
2355
2356       if (dump_file && (dump_flags & TDF_DETAILS))
2357         dump_cand (dump_file, cand);
2358     }
2359
2360   if (important && !cand->important)
2361     {
2362       cand->important = true;
2363       if (dump_file && (dump_flags & TDF_DETAILS))
2364         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2365     }
2366
2367   if (use)
2368     {
2369       bitmap_set_bit (use->related_cands, i);
2370       if (dump_file && (dump_flags & TDF_DETAILS))
2371         fprintf (dump_file, "Candidate %d is related to use %d\n",
2372                  cand->id, use->id);
2373     }
2374
2375   return cand;
2376 }
2377
2378 /* Returns true if incrementing the induction variable at the end of the LOOP
2379    is allowed.
2380
2381    The purpose is to avoid splitting latch edge with a biv increment, thus
2382    creating a jump, possibly confusing other optimization passes and leaving
2383    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2384    is not available (so we do not have a better alternative), or if the latch
2385    edge is already nonempty.  */
2386
2387 static bool
2388 allow_ip_end_pos_p (struct loop *loop)
2389 {
2390   if (!ip_normal_pos (loop))
2391     return true;
2392
2393   if (!empty_block_p (ip_end_pos (loop)))
2394     return true;
2395
2396   return false;
2397 }
2398
2399 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2400    Important field is set to IMPORTANT.  */
2401
2402 static void
2403 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2404                         bool important, struct iv_use *use)
2405 {
2406   basic_block use_bb = gimple_bb (use->stmt);
2407   enum machine_mode mem_mode;
2408   unsigned HOST_WIDE_INT cstepi;
2409
2410   /* If we insert the increment in any position other than the standard
2411      ones, we must ensure that it is incremented once per iteration.
2412      It must not be in an inner nested loop, or one side of an if
2413      statement.  */
2414   if (use_bb->loop_father != data->current_loop
2415       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2416       || stmt_could_throw_p (use->stmt)
2417       || !cst_and_fits_in_hwi (step))
2418     return;
2419
2420   cstepi = int_cst_value (step);
2421
2422   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2423   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2424         || USE_STORE_PRE_INCREMENT (mem_mode))
2425        && GET_MODE_SIZE (mem_mode) == cstepi)
2426       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2427            || USE_STORE_PRE_DECREMENT (mem_mode))
2428           && GET_MODE_SIZE (mem_mode) == -cstepi))
2429     {
2430       enum tree_code code = MINUS_EXPR;
2431       tree new_base;
2432       tree new_step = step;
2433
2434       if (POINTER_TYPE_P (TREE_TYPE (base)))
2435         {
2436           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2437           code = POINTER_PLUS_EXPR;
2438         }
2439       else
2440         new_step = fold_convert (TREE_TYPE (base), new_step);
2441       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2442       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2443                        use->stmt);
2444     }
2445   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2446         || USE_STORE_POST_INCREMENT (mem_mode))
2447        && GET_MODE_SIZE (mem_mode) == cstepi)
2448       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2449            || USE_STORE_POST_DECREMENT (mem_mode))
2450           && GET_MODE_SIZE (mem_mode) == -cstepi))
2451     {
2452       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2453                        use->stmt);
2454     }
2455 }
2456
2457 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2458    position to POS.  If USE is not NULL, the candidate is set as related to
2459    it.  The candidate computation is scheduled on all available positions.  */
2460
2461 static void
2462 add_candidate (struct ivopts_data *data,
2463                tree base, tree step, bool important, struct iv_use *use)
2464 {
2465   if (ip_normal_pos (data->current_loop))
2466     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2467   if (ip_end_pos (data->current_loop)
2468       && allow_ip_end_pos_p (data->current_loop))
2469     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2470
2471   if (use != NULL && use->type == USE_ADDRESS)
2472     add_autoinc_candidates (data, base, step, important, use);
2473 }
2474
2475 /* Adds standard iv candidates.  */
2476
2477 static void
2478 add_standard_iv_candidates (struct ivopts_data *data)
2479 {
2480   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2481
2482   /* The same for a double-integer type if it is still fast enough.  */
2483   if (TYPE_PRECISION
2484         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2485       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2486     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2487                    build_int_cst (long_integer_type_node, 1), true, NULL);
2488
2489   /* The same for a double-integer type if it is still fast enough.  */
2490   if (TYPE_PRECISION
2491         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2492       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2493     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2494                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2495 }
2496
2497
2498 /* Adds candidates bases on the old induction variable IV.  */
2499
2500 static void
2501 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2502 {
2503   gimple phi;
2504   tree def;
2505   struct iv_cand *cand;
2506
2507   add_candidate (data, iv->base, iv->step, true, NULL);
2508
2509   /* The same, but with initial value zero.  */
2510   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2511     add_candidate (data, size_int (0), iv->step, true, NULL);
2512   else
2513     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2514                    iv->step, true, NULL);
2515
2516   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2517   if (gimple_code (phi) == GIMPLE_PHI)
2518     {
2519       /* Additionally record the possibility of leaving the original iv
2520          untouched.  */
2521       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2522       cand = add_candidate_1 (data,
2523                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2524                               SSA_NAME_DEF_STMT (def));
2525       cand->var_before = iv->ssa_name;
2526       cand->var_after = def;
2527     }
2528 }
2529
2530 /* Adds candidates based on the old induction variables.  */
2531
2532 static void
2533 add_old_ivs_candidates (struct ivopts_data *data)
2534 {
2535   unsigned i;
2536   struct iv *iv;
2537   bitmap_iterator bi;
2538
2539   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2540     {
2541       iv = ver_info (data, i)->iv;
2542       if (iv && iv->biv_p && !integer_zerop (iv->step))
2543         add_old_iv_candidates (data, iv);
2544     }
2545 }
2546
2547 /* Adds candidates based on the value of the induction variable IV and USE.  */
2548
2549 static void
2550 add_iv_value_candidates (struct ivopts_data *data,
2551                          struct iv *iv, struct iv_use *use)
2552 {
2553   unsigned HOST_WIDE_INT offset;
2554   tree base;
2555   tree basetype;
2556
2557   add_candidate (data, iv->base, iv->step, false, use);
2558
2559   /* The same, but with initial value zero.  Make such variable important,
2560      since it is generic enough so that possibly many uses may be based
2561      on it.  */
2562   basetype = TREE_TYPE (iv->base);
2563   if (POINTER_TYPE_P (basetype))
2564     basetype = sizetype;
2565   add_candidate (data, build_int_cst (basetype, 0),
2566                  iv->step, true, use);
2567
2568   /* Third, try removing the constant offset.  Make sure to even
2569      add a candidate for &a[0] vs. (T *)&a.  */
2570   base = strip_offset (iv->base, &offset);
2571   if (offset
2572       || base != iv->base)
2573     add_candidate (data, base, iv->step, false, use);
2574 }
2575
2576 /* Adds candidates based on the uses.  */
2577
2578 static void
2579 add_derived_ivs_candidates (struct ivopts_data *data)
2580 {
2581   unsigned i;
2582
2583   for (i = 0; i < n_iv_uses (data); i++)
2584     {
2585       struct iv_use *use = iv_use (data, i);
2586
2587       if (!use)
2588         continue;
2589
2590       switch (use->type)
2591         {
2592         case USE_NONLINEAR_EXPR:
2593         case USE_COMPARE:
2594         case USE_ADDRESS:
2595           /* Just add the ivs based on the value of the iv used here.  */
2596           add_iv_value_candidates (data, use->iv, use);
2597           break;
2598
2599         default:
2600           gcc_unreachable ();
2601         }
2602     }
2603 }
2604
2605 /* Record important candidates and add them to related_cands bitmaps
2606    if needed.  */
2607
2608 static void
2609 record_important_candidates (struct ivopts_data *data)
2610 {
2611   unsigned i;
2612   struct iv_use *use;
2613
2614   for (i = 0; i < n_iv_cands (data); i++)
2615     {
2616       struct iv_cand *cand = iv_cand (data, i);
2617
2618       if (cand->important)
2619         bitmap_set_bit (data->important_candidates, i);
2620     }
2621
2622   data->consider_all_candidates = (n_iv_cands (data)
2623                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2624
2625   if (data->consider_all_candidates)
2626     {
2627       /* We will not need "related_cands" bitmaps in this case,
2628          so release them to decrease peak memory consumption.  */
2629       for (i = 0; i < n_iv_uses (data); i++)
2630         {
2631           use = iv_use (data, i);
2632           BITMAP_FREE (use->related_cands);
2633         }
2634     }
2635   else
2636     {
2637       /* Add important candidates to the related_cands bitmaps.  */
2638       for (i = 0; i < n_iv_uses (data); i++)
2639         bitmap_ior_into (iv_use (data, i)->related_cands,
2640                          data->important_candidates);
2641     }
2642 }
2643
2644 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2645    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2646    we allocate a simple list to every use.  */
2647
2648 static void
2649 alloc_use_cost_map (struct ivopts_data *data)
2650 {
2651   unsigned i, size, s;
2652
2653   for (i = 0; i < n_iv_uses (data); i++)
2654     {
2655       struct iv_use *use = iv_use (data, i);
2656
2657       if (data->consider_all_candidates)
2658         size = n_iv_cands (data);
2659       else
2660         {
2661           s = bitmap_count_bits (use->related_cands);
2662
2663           /* Round up to the power of two, so that moduling by it is fast.  */
2664           size = s ? (1 << ceil_log2 (s)) : 1;
2665         }
2666
2667       use->n_map_members = size;
2668       use->cost_map = XCNEWVEC (struct cost_pair, size);
2669     }
2670 }
2671
2672 /* Returns description of computation cost of expression whose runtime
2673    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2674
2675 static comp_cost
2676 new_cost (unsigned runtime, unsigned complexity)
2677 {
2678   comp_cost cost;
2679
2680   cost.cost = runtime;
2681   cost.complexity = complexity;
2682
2683   return cost;
2684 }
2685
2686 /* Adds costs COST1 and COST2.  */
2687
2688 static comp_cost
2689 add_costs (comp_cost cost1, comp_cost cost2)
2690 {
2691   cost1.cost += cost2.cost;
2692   cost1.complexity += cost2.complexity;
2693
2694   return cost1;
2695 }
2696 /* Subtracts costs COST1 and COST2.  */
2697
2698 static comp_cost
2699 sub_costs (comp_cost cost1, comp_cost cost2)
2700 {
2701   cost1.cost -= cost2.cost;
2702   cost1.complexity -= cost2.complexity;
2703
2704   return cost1;
2705 }
2706
2707 /* Returns a negative number if COST1 < COST2, a positive number if
2708    COST1 > COST2, and 0 if COST1 = COST2.  */
2709
2710 static int
2711 compare_costs (comp_cost cost1, comp_cost cost2)
2712 {
2713   if (cost1.cost == cost2.cost)
2714     return cost1.complexity - cost2.complexity;
2715
2716   return cost1.cost - cost2.cost;
2717 }
2718
2719 /* Returns true if COST is infinite.  */
2720
2721 static bool
2722 infinite_cost_p (comp_cost cost)
2723 {
2724   return cost.cost == INFTY;
2725 }
2726
2727 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2728    on invariants DEPENDS_ON and that the value used in expressing it
2729    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2730
2731 static void
2732 set_use_iv_cost (struct ivopts_data *data,
2733                  struct iv_use *use, struct iv_cand *cand,
2734                  comp_cost cost, bitmap depends_on, tree value,
2735                  enum tree_code comp, int inv_expr_id)
2736 {
2737   unsigned i, s;
2738
2739   if (infinite_cost_p (cost))
2740     {
2741       BITMAP_FREE (depends_on);
2742       return;
2743     }
2744
2745   if (data->consider_all_candidates)
2746     {
2747       use->cost_map[cand->id].cand = cand;
2748       use->cost_map[cand->id].cost = cost;
2749       use->cost_map[cand->id].depends_on = depends_on;
2750       use->cost_map[cand->id].value = value;
2751       use->cost_map[cand->id].comp = comp;
2752       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2753       return;
2754     }
2755
2756   /* n_map_members is a power of two, so this computes modulo.  */
2757   s = cand->id & (use->n_map_members - 1);
2758   for (i = s; i < use->n_map_members; i++)
2759     if (!use->cost_map[i].cand)
2760       goto found;
2761   for (i = 0; i < s; i++)
2762     if (!use->cost_map[i].cand)
2763       goto found;
2764
2765   gcc_unreachable ();
2766
2767 found:
2768   use->cost_map[i].cand = cand;
2769   use->cost_map[i].cost = cost;
2770   use->cost_map[i].depends_on = depends_on;
2771   use->cost_map[i].value = value;
2772   use->cost_map[i].comp = comp;
2773   use->cost_map[i].inv_expr_id = inv_expr_id;
2774 }
2775
2776 /* Gets cost of (USE, CANDIDATE) pair.  */
2777
2778 static struct cost_pair *
2779 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2780                  struct iv_cand *cand)
2781 {
2782   unsigned i, s;
2783   struct cost_pair *ret;
2784
2785   if (!cand)
2786     return NULL;
2787
2788   if (data->consider_all_candidates)
2789     {
2790       ret = use->cost_map + cand->id;
2791       if (!ret->cand)
2792         return NULL;
2793
2794       return ret;
2795     }
2796
2797   /* n_map_members is a power of two, so this computes modulo.  */
2798   s = cand->id & (use->n_map_members - 1);
2799   for (i = s; i < use->n_map_members; i++)
2800     if (use->cost_map[i].cand == cand)
2801       return use->cost_map + i;
2802     else if (use->cost_map[i].cand == NULL)
2803       return NULL;
2804   for (i = 0; i < s; i++)
2805     if (use->cost_map[i].cand == cand)
2806       return use->cost_map + i;
2807     else if (use->cost_map[i].cand == NULL)
2808       return NULL;
2809
2810   return NULL;
2811 }
2812
2813 /* Returns estimate on cost of computing SEQ.  */
2814
2815 static unsigned
2816 seq_cost (rtx seq, bool speed)
2817 {
2818   unsigned cost = 0;
2819   rtx set;
2820
2821   for (; seq; seq = NEXT_INSN (seq))
2822     {
2823       set = single_set (seq);
2824       if (set)
2825         cost += set_src_cost (SET_SRC (set), speed);
2826       else
2827         cost++;
2828     }
2829
2830   return cost;
2831 }
2832
2833 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2834 static rtx
2835 produce_memory_decl_rtl (tree obj, int *regno)
2836 {
2837   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2838   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2839   rtx x;
2840
2841   gcc_assert (obj);
2842   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2843     {
2844       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2845       x = gen_rtx_SYMBOL_REF (address_mode, name);
2846       SET_SYMBOL_REF_DECL (x, obj);
2847       x = gen_rtx_MEM (DECL_MODE (obj), x);
2848       set_mem_addr_space (x, as);
2849       targetm.encode_section_info (obj, x, true);
2850     }
2851   else
2852     {
2853       x = gen_raw_REG (address_mode, (*regno)++);
2854       x = gen_rtx_MEM (DECL_MODE (obj), x);
2855       set_mem_addr_space (x, as);
2856     }
2857
2858   return x;
2859 }
2860
2861 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2862    walk_tree.  DATA contains the actual fake register number.  */
2863
2864 static tree
2865 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2866 {
2867   tree obj = NULL_TREE;
2868   rtx x = NULL_RTX;
2869   int *regno = (int *) data;
2870
2871   switch (TREE_CODE (*expr_p))
2872     {
2873     case ADDR_EXPR:
2874       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2875            handled_component_p (*expr_p);
2876            expr_p = &TREE_OPERAND (*expr_p, 0))
2877         continue;
2878       obj = *expr_p;
2879       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2880         x = produce_memory_decl_rtl (obj, regno);
2881       break;
2882
2883     case SSA_NAME:
2884       *ws = 0;
2885       obj = SSA_NAME_VAR (*expr_p);
2886       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2887       if (!obj)
2888         return NULL_TREE;
2889       if (!DECL_RTL_SET_P (obj))
2890         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2891       break;
2892
2893     case VAR_DECL:
2894     case PARM_DECL:
2895     case RESULT_DECL:
2896       *ws = 0;
2897       obj = *expr_p;
2898
2899       if (DECL_RTL_SET_P (obj))
2900         break;
2901
2902       if (DECL_MODE (obj) == BLKmode)
2903         x = produce_memory_decl_rtl (obj, regno);
2904       else
2905         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2906
2907       break;
2908
2909     default:
2910       break;
2911     }
2912
2913   if (x)
2914     {
2915       decl_rtl_to_reset.safe_push (obj);
2916       SET_DECL_RTL (obj, x);
2917     }
2918
2919   return NULL_TREE;
2920 }
2921
2922 /* Determines cost of the computation of EXPR.  */
2923
2924 static unsigned
2925 computation_cost (tree expr, bool speed)
2926 {
2927   rtx seq, rslt;
2928   tree type = TREE_TYPE (expr);
2929   unsigned cost;
2930   /* Avoid using hard regs in ways which may be unsupported.  */
2931   int regno = LAST_VIRTUAL_REGISTER + 1;
2932   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2933   enum node_frequency real_frequency = node->frequency;
2934
2935   node->frequency = NODE_FREQUENCY_NORMAL;
2936   crtl->maybe_hot_insn_p = speed;
2937   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2938   start_sequence ();
2939   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2940   seq = get_insns ();
2941   end_sequence ();
2942   default_rtl_profile ();
2943   node->frequency = real_frequency;
2944
2945   cost = seq_cost (seq, speed);
2946   if (MEM_P (rslt))
2947     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2948                           TYPE_ADDR_SPACE (type), speed);
2949   else if (!REG_P (rslt))
2950     cost += set_src_cost (rslt, speed);
2951
2952   return cost;
2953 }
2954
2955 /* Returns variable containing the value of candidate CAND at statement AT.  */
2956
2957 static tree
2958 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2959 {
2960   if (stmt_after_increment (loop, cand, stmt))
2961     return cand->var_after;
2962   else
2963     return cand->var_before;
2964 }
2965
2966 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2967    same precision that is at least as wide as the precision of TYPE, stores
2968    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2969    type of A and B.  */
2970
2971 static tree
2972 determine_common_wider_type (tree *a, tree *b)
2973 {
2974   tree wider_type = NULL;
2975   tree suba, subb;
2976   tree atype = TREE_TYPE (*a);
2977
2978   if (CONVERT_EXPR_P (*a))
2979     {
2980       suba = TREE_OPERAND (*a, 0);
2981       wider_type = TREE_TYPE (suba);
2982       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2983         return atype;
2984     }
2985   else
2986     return atype;
2987
2988   if (CONVERT_EXPR_P (*b))
2989     {
2990       subb = TREE_OPERAND (*b, 0);
2991       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2992         return atype;
2993     }
2994   else
2995     return atype;
2996
2997   *a = suba;
2998   *b = subb;
2999   return wider_type;
3000 }
3001
3002 /* Determines the expression by that USE is expressed from induction variable
3003    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3004    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3005
3006 static bool
3007 get_computation_aff (struct loop *loop,
3008                      struct iv_use *use, struct iv_cand *cand, gimple at,
3009                      struct affine_tree_combination *aff)
3010 {
3011   tree ubase = use->iv->base;
3012   tree ustep = use->iv->step;
3013   tree cbase = cand->iv->base;
3014   tree cstep = cand->iv->step, cstep_common;
3015   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3016   tree common_type, var;
3017   tree uutype;
3018   aff_tree cbase_aff, var_aff;
3019   double_int rat;
3020
3021   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3022     {
3023       /* We do not have a precision to express the values of use.  */
3024       return false;
3025     }
3026
3027   var = var_at_stmt (loop, cand, at);
3028   uutype = unsigned_type_for (utype);
3029
3030   /* If the conversion is not noop, perform it.  */
3031   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3032     {
3033       cstep = fold_convert (uutype, cstep);
3034       cbase = fold_convert (uutype, cbase);
3035       var = fold_convert (uutype, var);
3036     }
3037
3038   if (!constant_multiple_of (ustep, cstep, &rat))
3039     return false;
3040
3041   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3042      type, we achieve better folding by computing their difference in this
3043      wider type, and cast the result to UUTYPE.  We do not need to worry about
3044      overflows, as all the arithmetics will in the end be performed in UUTYPE
3045      anyway.  */
3046   common_type = determine_common_wider_type (&ubase, &cbase);
3047
3048   /* use = ubase - ratio * cbase + ratio * var.  */
3049   tree_to_aff_combination (ubase, common_type, aff);
3050   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3051   tree_to_aff_combination (var, uutype, &var_aff);
3052
3053   /* We need to shift the value if we are after the increment.  */
3054   if (stmt_after_increment (loop, cand, at))
3055     {
3056       aff_tree cstep_aff;
3057
3058       if (common_type != uutype)
3059         cstep_common = fold_convert (common_type, cstep);
3060       else
3061         cstep_common = cstep;
3062
3063       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3064       aff_combination_add (&cbase_aff, &cstep_aff);
3065     }
3066
3067   aff_combination_scale (&cbase_aff, -rat);
3068   aff_combination_add (aff, &cbase_aff);
3069   if (common_type != uutype)
3070     aff_combination_convert (aff, uutype);
3071
3072   aff_combination_scale (&var_aff, rat);
3073   aff_combination_add (aff, &var_aff);
3074
3075   return true;
3076 }
3077
3078 /* Return the type of USE.  */
3079
3080 static tree
3081 get_use_type (struct iv_use *use)
3082 {
3083   tree base_type = TREE_TYPE (use->iv->base);
3084   tree type;
3085
3086   if (use->type == USE_ADDRESS)
3087     {
3088       /* The base_type may be a void pointer.  Create a pointer type based on
3089          the mem_ref instead.  */
3090       type = build_pointer_type (TREE_TYPE (*use->op_p));
3091       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3092                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3093     }
3094   else
3095     type = base_type;
3096
3097   return type;
3098 }
3099
3100 /* Determines the expression by that USE is expressed from induction variable
3101    CAND at statement AT in LOOP.  The computation is unshared.  */
3102
3103 static tree
3104 get_computation_at (struct loop *loop,
3105                     struct iv_use *use, struct iv_cand *cand, gimple at)
3106 {
3107   aff_tree aff;
3108   tree type = get_use_type (use);
3109
3110   if (!get_computation_aff (loop, use, cand, at, &aff))
3111     return NULL_TREE;
3112   unshare_aff_combination (&aff);
3113   return fold_convert (type, aff_combination_to_tree (&aff));
3114 }
3115
3116 /* Determines the expression by that USE is expressed from induction variable
3117    CAND in LOOP.  The computation is unshared.  */
3118
3119 static tree
3120 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3121 {
3122   return get_computation_at (loop, use, cand, use->stmt);
3123 }
3124
3125 /* Adjust the cost COST for being in loop setup rather than loop body.
3126    If we're optimizing for space, the loop setup overhead is constant;
3127    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3128 static unsigned
3129 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3130 {
3131   if (cost == INFTY)
3132     return cost;
3133   else if (optimize_loop_for_speed_p (data->current_loop))
3134     return cost / avg_loop_niter (data->current_loop);
3135   else
3136     return cost;
3137 }
3138
3139 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3140    validity for a memory reference accessing memory of mode MODE in
3141    address space AS.  */
3142
3143
3144 bool
3145 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3146                                  addr_space_t as)
3147 {
3148 #define MAX_RATIO 128
3149   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3150   static vec<sbitmap> valid_mult_list;
3151   sbitmap valid_mult;
3152
3153   if (data_index >= valid_mult_list.length ())
3154     valid_mult_list.safe_grow_cleared (data_index + 1);
3155
3156   valid_mult = valid_mult_list[data_index];
3157   if (!valid_mult)
3158     {
3159       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3160       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3161       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3162       rtx addr, scaled;
3163       HOST_WIDE_INT i;
3164
3165       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3166       bitmap_clear (valid_mult);
3167       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3168       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3169       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3170         {
3171           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3172           if (memory_address_addr_space_p (mode, addr, as)
3173               || memory_address_addr_space_p (mode, scaled, as))
3174             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3175         }
3176
3177       if (dump_file && (dump_flags & TDF_DETAILS))
3178         {
3179           fprintf (dump_file, "  allowed multipliers:");
3180           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3181             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3182               fprintf (dump_file, " %d", (int) i);
3183           fprintf (dump_file, "\n");
3184           fprintf (dump_file, "\n");
3185         }
3186
3187       valid_mult_list[data_index] = valid_mult;
3188     }
3189
3190   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3191     return false;
3192
3193   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3194 }
3195
3196 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3197    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3198    variable is omitted.  Compute the cost for a memory reference that accesses
3199    a memory location of mode MEM_MODE in address space AS.
3200
3201    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3202    size of MEM_MODE / RATIO) is available.  To make this determination, we
3203    look at the size of the increment to be made, which is given in CSTEP.
3204    CSTEP may be zero if the step is unknown.
3205    STMT_AFTER_INC is true iff the statement we're looking at is after the
3206    increment of the original biv.
3207
3208    TODO -- there must be some better way.  This all is quite crude.  */
3209
3210 enum ainc_type
3211 {
3212   AINC_PRE_INC,         /* Pre increment.  */
3213   AINC_PRE_DEC,         /* Pre decrement.  */
3214   AINC_POST_INC,        /* Post increment.  */
3215   AINC_POST_DEC,        /* Post decrement.  */
3216   AINC_NONE             /* Also the number of auto increment types.  */
3217 };
3218
3219 typedef struct address_cost_data_s
3220 {
3221   HOST_WIDE_INT min_offset, max_offset;
3222   unsigned costs[2][2][2][2];
3223   unsigned ainc_costs[AINC_NONE];
3224 } *address_cost_data;
3225
3226
3227 static comp_cost
3228 get_address_cost (bool symbol_present, bool var_present,
3229                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3230                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3231                   addr_space_t as, bool speed,
3232                   bool stmt_after_inc, bool *may_autoinc)
3233 {
3234   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3235   static vec<address_cost_data> address_cost_data_list;
3236   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3237   address_cost_data data;
3238   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3239   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3240   unsigned cost, acost, complexity;
3241   enum ainc_type autoinc_type;
3242   bool offset_p, ratio_p, autoinc;
3243   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3244   unsigned HOST_WIDE_INT mask;
3245   unsigned bits;
3246
3247   if (data_index >= address_cost_data_list.length ())
3248     address_cost_data_list.safe_grow_cleared (data_index + 1);
3249
3250   data = address_cost_data_list[data_index];
3251   if (!data)
3252     {
3253       HOST_WIDE_INT i;
3254       HOST_WIDE_INT rat, off = 0;
3255       int old_cse_not_expected, width;
3256       unsigned sym_p, var_p, off_p, rat_p, add_c;
3257       rtx seq, addr, base;
3258       rtx reg0, reg1;
3259
3260       data = (address_cost_data) xcalloc (1, sizeof (*data));
3261
3262       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3263
3264       width = GET_MODE_BITSIZE (address_mode) - 1;
3265       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3266         width = HOST_BITS_PER_WIDE_INT - 1;
3267       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3268
3269       for (i = width; i >= 0; i--)
3270         {
3271           off = -((unsigned HOST_WIDE_INT) 1 << i);
3272           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3273           if (memory_address_addr_space_p (mem_mode, addr, as))
3274             break;
3275         }
3276       data->min_offset = (i == -1? 0 : off);
3277
3278       for (i = width; i >= 0; i--)
3279         {
3280           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3281           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3282           if (memory_address_addr_space_p (mem_mode, addr, as))
3283             break;
3284         }
3285       if (i == -1)
3286         off = 0;
3287       data->max_offset = off;
3288
3289       if (dump_file && (dump_flags & TDF_DETAILS))
3290         {
3291           fprintf (dump_file, "get_address_cost:\n");
3292           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3293                    GET_MODE_NAME (mem_mode),
3294                    data->min_offset);
3295           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3296                    GET_MODE_NAME (mem_mode),
3297                    data->max_offset);
3298         }
3299
3300       rat = 1;
3301       for (i = 2; i <= MAX_RATIO; i++)
3302         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3303           {
3304             rat = i;
3305             break;
3306           }
3307
3308       /* Compute the cost of various addressing modes.  */
3309       acost = 0;
3310       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3311       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3312
3313       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3314           || USE_STORE_PRE_DECREMENT (mem_mode))
3315         {
3316           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3317           has_predec[mem_mode]
3318             = memory_address_addr_space_p (mem_mode, addr, as);
3319
3320           if (has_predec[mem_mode])
3321             data->ainc_costs[AINC_PRE_DEC]
3322               = address_cost (addr, mem_mode, as, speed);
3323         }
3324       if (USE_LOAD_POST_DECREMENT (mem_mode)
3325           || USE_STORE_POST_DECREMENT (mem_mode))
3326         {
3327           addr = gen_rtx_POST_DEC (address_mode, reg0);
3328           has_postdec[mem_mode]
3329             = memory_address_addr_space_p (mem_mode, addr, as);
3330
3331           if (has_postdec[mem_mode])
3332             data->ainc_costs[AINC_POST_DEC]
3333               = address_cost (addr, mem_mode, as, speed);
3334         }
3335       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3336           || USE_STORE_PRE_DECREMENT (mem_mode))
3337         {
3338           addr = gen_rtx_PRE_INC (address_mode, reg0);
3339           has_preinc[mem_mode]
3340             = memory_address_addr_space_p (mem_mode, addr, as);
3341
3342           if (has_preinc[mem_mode])
3343             data->ainc_costs[AINC_PRE_INC]
3344               = address_cost (addr, mem_mode, as, speed);
3345         }
3346       if (USE_LOAD_POST_INCREMENT (mem_mode)
3347           || USE_STORE_POST_INCREMENT (mem_mode))
3348         {
3349           addr = gen_rtx_POST_INC (address_mode, reg0);
3350           has_postinc[mem_mode]
3351             = memory_address_addr_space_p (mem_mode, addr, as);
3352
3353           if (has_postinc[mem_mode])
3354             data->ainc_costs[AINC_POST_INC]
3355               = address_cost (addr, mem_mode, as, speed);
3356         }
3357       for (i = 0; i < 16; i++)
3358         {
3359           sym_p = i & 1;
3360           var_p = (i >> 1) & 1;
3361           off_p = (i >> 2) & 1;
3362           rat_p = (i >> 3) & 1;
3363
3364           addr = reg0;
3365           if (rat_p)
3366             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3367                                    gen_int_mode (rat, address_mode));
3368
3369           if (var_p)
3370             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3371
3372           if (sym_p)
3373             {
3374               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3375               /* ??? We can run into trouble with some backends by presenting
3376                  it with symbols which haven't been properly passed through
3377                  targetm.encode_section_info.  By setting the local bit, we
3378                  enhance the probability of things working.  */
3379               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3380
3381               if (off_p)
3382                 base = gen_rtx_fmt_e (CONST, address_mode,
3383                                       gen_rtx_fmt_ee
3384                                         (PLUS, address_mode, base,
3385                                          gen_int_mode (off, address_mode)));
3386             }
3387           else if (off_p)
3388             base = gen_int_mode (off, address_mode);
3389           else
3390             base = NULL_RTX;
3391
3392           if (base)
3393             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3394
3395           start_sequence ();
3396           /* To avoid splitting addressing modes, pretend that no cse will
3397              follow.  */
3398           old_cse_not_expected = cse_not_expected;
3399           cse_not_expected = true;
3400           addr = memory_address_addr_space (mem_mode, addr, as);
3401           cse_not_expected = old_cse_not_expected;
3402           seq = get_insns ();
3403           end_sequence ();
3404
3405           acost = seq_cost (seq, speed);
3406           acost += address_cost (addr, mem_mode, as, speed);
3407
3408           if (!acost)
3409             acost = 1;
3410           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3411         }
3412
3413       /* On some targets, it is quite expensive to load symbol to a register,
3414          which makes addresses that contain symbols look much more expensive.
3415          However, the symbol will have to be loaded in any case before the
3416          loop (and quite likely we have it in register already), so it does not
3417          make much sense to penalize them too heavily.  So make some final
3418          tweaks for the SYMBOL_PRESENT modes:
3419
3420          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3421          var is cheaper, use this mode with small penalty.
3422          If VAR_PRESENT is true, try whether the mode with
3423          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3424          if this is the case, use it.  */
3425       add_c = add_cost (speed, address_mode);
3426       for (i = 0; i < 8; i++)
3427         {
3428           var_p = i & 1;
3429           off_p = (i >> 1) & 1;
3430           rat_p = (i >> 2) & 1;
3431
3432           acost = data->costs[0][1][off_p][rat_p] + 1;
3433           if (var_p)
3434             acost += add_c;
3435
3436           if (acost < data->costs[1][var_p][off_p][rat_p])
3437             data->costs[1][var_p][off_p][rat_p] = acost;
3438         }
3439
3440       if (dump_file && (dump_flags & TDF_DETAILS))
3441         {
3442           fprintf (dump_file, "Address costs:\n");
3443
3444           for (i = 0; i < 16; i++)
3445             {
3446               sym_p = i & 1;
3447               var_p = (i >> 1) & 1;
3448               off_p = (i >> 2) & 1;
3449               rat_p = (i >> 3) & 1;
3450
3451               fprintf (dump_file, "  ");
3452               if (sym_p)
3453                 fprintf (dump_file, "sym + ");
3454               if (var_p)
3455                 fprintf (dump_file, "var + ");
3456               if (off_p)
3457                 fprintf (dump_file, "cst + ");
3458               if (rat_p)
3459                 fprintf (dump_file, "rat * ");
3460
3461               acost = data->costs[sym_p][var_p][off_p][rat_p];
3462               fprintf (dump_file, "index costs %d\n", acost);
3463             }
3464           if (has_predec[mem_mode] || has_postdec[mem_mode]
3465               || has_preinc[mem_mode] || has_postinc[mem_mode])
3466             fprintf (dump_file, "  May include autoinc/dec\n");
3467           fprintf (dump_file, "\n");
3468         }
3469
3470       address_cost_data_list[data_index] = data;
3471     }
3472
3473   bits = GET_MODE_BITSIZE (address_mode);
3474   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3475   offset &= mask;
3476   if ((offset >> (bits - 1) & 1))
3477     offset |= ~mask;
3478   s_offset = offset;
3479
3480   autoinc = false;
3481   autoinc_type = AINC_NONE;
3482   msize = GET_MODE_SIZE (mem_mode);
3483   autoinc_offset = offset;
3484   if (stmt_after_inc)
3485     autoinc_offset += ratio * cstep;
3486   if (symbol_present || var_present || ratio != 1)
3487     autoinc = false;
3488   else
3489     {
3490       if (has_postinc[mem_mode] && autoinc_offset == 0
3491           && msize == cstep)
3492         autoinc_type = AINC_POST_INC;
3493       else if (has_postdec[mem_mode] && autoinc_offset == 0
3494                && msize == -cstep)
3495         autoinc_type = AINC_POST_DEC;
3496       else if (has_preinc[mem_mode] && autoinc_offset == msize
3497                && msize == cstep)
3498         autoinc_type = AINC_PRE_INC;
3499       else if (has_predec[mem_mode] && autoinc_offset == -msize
3500                && msize == -cstep)
3501         autoinc_type = AINC_PRE_DEC;
3502
3503       if (autoinc_type != AINC_NONE)
3504         autoinc = true;
3505     }
3506
3507   cost = 0;
3508   offset_p = (s_offset != 0
3509               && data->min_offset <= s_offset
3510               && s_offset <= data->max_offset);
3511   ratio_p = (ratio != 1
3512              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3513
3514   if (ratio != 1 && !ratio_p)
3515     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3516
3517   if (s_offset && !offset_p && !symbol_present)
3518     cost += add_cost (speed, address_mode);
3519
3520   if (may_autoinc)
3521     *may_autoinc = autoinc;
3522   if (autoinc)
3523     acost = data->ainc_costs[autoinc_type];
3524   else
3525     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3526   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3527   return new_cost (cost + acost, complexity);
3528 }
3529
3530  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3531     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3532     calculating the operands of EXPR.  Returns true if successful, and returns
3533     the cost in COST.  */
3534
3535 static bool
3536 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3537                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3538 {
3539   comp_cost res;
3540   tree op1 = TREE_OPERAND (expr, 1);
3541   tree cst = TREE_OPERAND (mult, 1);
3542   tree multop = TREE_OPERAND (mult, 0);
3543   int m = exact_log2 (int_cst_value (cst));
3544   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3545   int sa_cost;
3546   bool equal_p = false;
3547
3548   if (!(m >= 0 && m < maxm))
3549     return false;
3550
3551   if (operand_equal_p (op1, mult, 0))
3552     equal_p = true;
3553
3554   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3555              ? shiftadd_cost (speed, mode, m)
3556              : (equal_p
3557                 ? shiftsub1_cost (speed, mode, m)
3558                 : shiftsub0_cost (speed, mode, m)));
3559   res = new_cost (sa_cost, 0);
3560   res = add_costs (res, equal_p ? cost0 : cost1);
3561
3562   STRIP_NOPS (multop);
3563   if (!is_gimple_val (multop))
3564     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3565
3566   *cost = res;
3567   return true;
3568 }
3569
3570 /* Estimates cost of forcing expression EXPR into a variable.  */
3571
3572 static comp_cost
3573 force_expr_to_var_cost (tree expr, bool speed)
3574 {
3575   static bool costs_initialized = false;
3576   static unsigned integer_cost [2];
3577   static unsigned symbol_cost [2];
3578   static unsigned address_cost [2];
3579   tree op0, op1;
3580   comp_cost cost0, cost1, cost;
3581   enum machine_mode mode;
3582
3583   if (!costs_initialized)
3584     {
3585       tree type = build_pointer_type (integer_type_node);
3586       tree var, addr;
3587       rtx x;
3588       int i;
3589
3590       var = create_tmp_var_raw (integer_type_node, "test_var");
3591       TREE_STATIC (var) = 1;
3592       x = produce_memory_decl_rtl (var, NULL);
3593       SET_DECL_RTL (var, x);
3594
3595       addr = build1 (ADDR_EXPR, type, var);
3596
3597
3598       for (i = 0; i < 2; i++)
3599         {
3600           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3601                                                              2000), i);
3602
3603           symbol_cost[i] = computation_cost (addr, i) + 1;
3604
3605           address_cost[i]
3606             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3607           if (dump_file && (dump_flags & TDF_DETAILS))
3608             {
3609               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3610               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3611               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3612               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3613               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3614               fprintf (dump_file, "\n");
3615             }
3616         }
3617
3618       costs_initialized = true;
3619     }
3620
3621   STRIP_NOPS (expr);
3622
3623   if (SSA_VAR_P (expr))
3624     return no_cost;
3625
3626   if (is_gimple_min_invariant (expr))
3627     {
3628       if (TREE_CODE (expr) == INTEGER_CST)
3629         return new_cost (integer_cost [speed], 0);
3630
3631       if (TREE_CODE (expr) == ADDR_EXPR)
3632         {
3633           tree obj = TREE_OPERAND (expr, 0);
3634
3635           if (TREE_CODE (obj) == VAR_DECL
3636               || TREE_CODE (obj) == PARM_DECL
3637               || TREE_CODE (obj) == RESULT_DECL)
3638             return new_cost (symbol_cost [speed], 0);
3639         }
3640
3641       return new_cost (address_cost [speed], 0);
3642     }
3643
3644   switch (TREE_CODE (expr))
3645     {
3646     case POINTER_PLUS_EXPR:
3647     case PLUS_EXPR:
3648     case MINUS_EXPR:
3649     case MULT_EXPR:
3650       op0 = TREE_OPERAND (expr, 0);
3651       op1 = TREE_OPERAND (expr, 1);
3652       STRIP_NOPS (op0);
3653       STRIP_NOPS (op1);
3654       break;
3655
3656     CASE_CONVERT:
3657     case NEGATE_EXPR:
3658       op0 = TREE_OPERAND (expr, 0);
3659       STRIP_NOPS (op0);
3660       op1 = NULL_TREE;
3661       break;
3662
3663     default:
3664       /* Just an arbitrary value, FIXME.  */
3665       return new_cost (target_spill_cost[speed], 0);
3666     }
3667
3668   if (op0 == NULL_TREE
3669       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3670     cost0 = no_cost;
3671   else
3672     cost0 = force_expr_to_var_cost (op0, speed);
3673
3674   if (op1 == NULL_TREE
3675       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3676     cost1 = no_cost;
3677   else
3678     cost1 = force_expr_to_var_cost (op1, speed);
3679
3680   mode = TYPE_MODE (TREE_TYPE (expr));
3681   switch (TREE_CODE (expr))
3682     {
3683     case POINTER_PLUS_EXPR:
3684     case PLUS_EXPR:
3685     case MINUS_EXPR:
3686     case NEGATE_EXPR:
3687       cost = new_cost (add_cost (speed, mode), 0);
3688       if (TREE_CODE (expr) != NEGATE_EXPR)
3689         {
3690           tree mult = NULL_TREE;
3691           comp_cost sa_cost;
3692           if (TREE_CODE (op1) == MULT_EXPR)
3693             mult = op1;
3694           else if (TREE_CODE (op0) == MULT_EXPR)
3695             mult = op0;
3696
3697           if (mult != NULL_TREE
3698               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3699               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3700                                     speed, &sa_cost))
3701             return sa_cost;
3702         }
3703       break;
3704
3705     CASE_CONVERT:
3706       {
3707         tree inner_mode, outer_mode;
3708         outer_mode = TREE_TYPE (expr);
3709         inner_mode = TREE_TYPE (op0);
3710         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3711                                        TYPE_MODE (inner_mode), speed), 0);
3712       }
3713       break;
3714
3715     case MULT_EXPR:
3716       if (cst_and_fits_in_hwi (op0))
3717         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3718                                              mode, speed), 0);
3719       else if (cst_and_fits_in_hwi (op1))
3720         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3721                                              mode, speed), 0);
3722       else
3723         return new_cost (target_spill_cost [speed], 0);
3724       break;
3725
3726     default:
3727       gcc_unreachable ();
3728     }
3729
3730   cost = add_costs (cost, cost0);
3731   cost = add_costs (cost, cost1);
3732
3733   /* Bound the cost by target_spill_cost.  The parts of complicated
3734      computations often are either loop invariant or at least can
3735      be shared between several iv uses, so letting this grow without
3736      limits would not give reasonable results.  */
3737   if (cost.cost > (int) target_spill_cost [speed])
3738     cost.cost = target_spill_cost [speed];
3739
3740   return cost;
3741 }
3742
3743 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3744    invariants the computation depends on.  */
3745
3746 static comp_cost
3747 force_var_cost (struct ivopts_data *data,
3748                 tree expr, bitmap *depends_on)
3749 {
3750   if (depends_on)
3751     {
3752       fd_ivopts_data = data;
3753       walk_tree (&expr, find_depends, depends_on, NULL);
3754     }
3755
3756   return force_expr_to_var_cost (expr, data->speed);
3757 }
3758
3759 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3760    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3761    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3762    invariants the computation depends on.  */
3763
3764 static comp_cost
3765 split_address_cost (struct ivopts_data *data,
3766                     tree addr, bool *symbol_present, bool *var_present,
3767                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3768 {
3769   tree core;
3770   HOST_WIDE_INT bitsize;
3771   HOST_WIDE_INT bitpos;
3772   tree toffset;
3773   enum machine_mode mode;
3774   int unsignedp, volatilep;
3775
3776   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3777                               &unsignedp, &volatilep, false);
3778
3779   if (toffset != 0
3780       || bitpos % BITS_PER_UNIT != 0
3781       || TREE_CODE (core) != VAR_DECL)
3782     {
3783       *symbol_present = false;
3784       *var_present = true;
3785       fd_ivopts_data = data;
3786       walk_tree (&addr, find_depends, depends_on, NULL);
3787       return new_cost (target_spill_cost[data->speed], 0);
3788     }
3789
3790   *offset += bitpos / BITS_PER_UNIT;
3791   if (TREE_STATIC (core)
3792       || DECL_EXTERNAL (core))
3793     {
3794       *symbol_present = true;
3795       *var_present = false;
3796       return no_cost;
3797     }
3798
3799   *symbol_present = false;
3800   *var_present = true;
3801   return no_cost;
3802 }
3803
3804 /* Estimates cost of expressing difference of addresses E1 - E2 as
3805    var + symbol + offset.  The value of offset is added to OFFSET,
3806    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3807    part is missing.  DEPENDS_ON is a set of the invariants the computation
3808    depends on.  */
3809
3810 static comp_cost
3811 ptr_difference_cost (struct ivopts_data *data,
3812                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3813                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3814 {
3815   HOST_WIDE_INT diff = 0;
3816   aff_tree aff_e1, aff_e2;
3817   tree type;
3818
3819   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3820
3821   if (ptr_difference_const (e1, e2, &diff))
3822     {
3823       *offset += diff;
3824       *symbol_present = false;
3825       *var_present = false;
3826       return no_cost;
3827     }
3828
3829   if (integer_zerop (e2))
3830     return split_address_cost (data, TREE_OPERAND (e1, 0),
3831                                symbol_present, var_present, offset, depends_on);
3832
3833   *symbol_present = false;
3834   *var_present = true;
3835
3836   type = signed_type_for (TREE_TYPE (e1));
3837   tree_to_aff_combination (e1, type, &aff_e1);
3838   tree_to_aff_combination (e2, type, &aff_e2);
3839   aff_combination_scale (&aff_e2, double_int_minus_one);
3840   aff_combination_add (&aff_e1, &aff_e2);
3841
3842   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3843 }
3844
3845 /* Estimates cost of expressing difference E1 - E2 as
3846    var + symbol + offset.  The value of offset is added to OFFSET,
3847    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3848    part is missing.  DEPENDS_ON is a set of the invariants the computation
3849    depends on.  */
3850
3851 static comp_cost
3852 difference_cost (struct ivopts_data *data,
3853                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3854                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3855 {
3856   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3857   unsigned HOST_WIDE_INT off1, off2;
3858   aff_tree aff_e1, aff_e2;
3859   tree type;
3860
3861   e1 = strip_offset (e1, &off1);
3862   e2 = strip_offset (e2, &off2);
3863   *offset += off1 - off2;
3864
3865   STRIP_NOPS (e1);
3866   STRIP_NOPS (e2);
3867
3868   if (TREE_CODE (e1) == ADDR_EXPR)
3869     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3870                                 offset, depends_on);
3871   *symbol_present = false;
3872
3873   if (operand_equal_p (e1, e2, 0))
3874     {
3875       *var_present = false;
3876       return no_cost;
3877     }
3878
3879   *var_present = true;
3880
3881   if (integer_zerop (e2))
3882     return force_var_cost (data, e1, depends_on);
3883
3884   if (integer_zerop (e1))
3885     {
3886       comp_cost cost = force_var_cost (data, e2, depends_on);
3887       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3888       return cost;
3889     }
3890
3891   type = signed_type_for (TREE_TYPE (e1));
3892   tree_to_aff_combination (e1, type, &aff_e1);
3893   tree_to_aff_combination (e2, type, &aff_e2);
3894   aff_combination_scale (&aff_e2, double_int_minus_one);
3895   aff_combination_add (&aff_e1, &aff_e2);
3896
3897   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3898 }
3899
3900 /* Returns true if AFF1 and AFF2 are identical.  */
3901
3902 static bool
3903 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3904 {
3905   unsigned i;
3906
3907   if (aff1->n != aff2->n)
3908     return false;
3909
3910   for (i = 0; i < aff1->n; i++)
3911     {
3912       if (aff1->elts[i].coef != aff2->elts[i].coef)
3913         return false;
3914
3915       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3916         return false;
3917     }
3918   return true;
3919 }
3920
3921 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3922
3923 static int
3924 get_expr_id (struct ivopts_data *data, tree expr)
3925 {
3926   struct iv_inv_expr_ent ent;
3927   struct iv_inv_expr_ent **slot;
3928
3929   ent.expr = expr;
3930   ent.hash = iterative_hash_expr (expr, 0);
3931   slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3932   if (*slot)
3933     return (*slot)->id;
3934
3935   *slot = XNEW (struct iv_inv_expr_ent);
3936   (*slot)->expr = expr;
3937   (*slot)->hash = ent.hash;
3938   (*slot)->id = data->inv_expr_id++;
3939   return (*slot)->id;
3940 }
3941
3942 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3943    requires a new compiler generated temporary.  Returns -1 otherwise.
3944    ADDRESS_P is a flag indicating if the expression is for address
3945    computation.  */
3946
3947 static int
3948 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3949                             tree cbase, HOST_WIDE_INT ratio,
3950                             bool address_p)
3951 {
3952   aff_tree ubase_aff, cbase_aff;
3953   tree expr, ub, cb;
3954
3955   STRIP_NOPS (ubase);
3956   STRIP_NOPS (cbase);
3957   ub = ubase;
3958   cb = cbase;
3959
3960   if ((TREE_CODE (ubase) == INTEGER_CST)
3961       && (TREE_CODE (cbase) == INTEGER_CST))
3962     return -1;
3963
3964   /* Strips the constant part. */
3965   if (TREE_CODE (ubase) == PLUS_EXPR
3966       || TREE_CODE (ubase) == MINUS_EXPR
3967       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3968     {
3969       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3970         ubase = TREE_OPERAND (ubase, 0);
3971     }
3972
3973   /* Strips the constant part. */
3974   if (TREE_CODE (cbase) == PLUS_EXPR
3975       || TREE_CODE (cbase) == MINUS_EXPR
3976       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3977     {
3978       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3979         cbase = TREE_OPERAND (cbase, 0);
3980     }
3981
3982   if (address_p)
3983     {
3984       if (((TREE_CODE (ubase) == SSA_NAME)
3985            || (TREE_CODE (ubase) == ADDR_EXPR
3986                && is_gimple_min_invariant (ubase)))
3987           && (TREE_CODE (cbase) == INTEGER_CST))
3988         return -1;
3989
3990       if (((TREE_CODE (cbase) == SSA_NAME)
3991            || (TREE_CODE (cbase) == ADDR_EXPR
3992                && is_gimple_min_invariant (cbase)))
3993           && (TREE_CODE (ubase) == INTEGER_CST))
3994         return -1;
3995     }
3996
3997   if (ratio == 1)
3998     {
3999       if (operand_equal_p (ubase, cbase, 0))
4000         return -1;
4001
4002       if (TREE_CODE (ubase) == ADDR_EXPR
4003           && TREE_CODE (cbase) == ADDR_EXPR)
4004         {
4005           tree usym, csym;
4006
4007           usym = TREE_OPERAND (ubase, 0);
4008           csym = TREE_OPERAND (cbase, 0);
4009           if (TREE_CODE (usym) == ARRAY_REF)
4010             {
4011               tree ind = TREE_OPERAND (usym, 1);
4012               if (TREE_CODE (ind) == INTEGER_CST
4013                   && tree_fits_shwi_p (ind)
4014                   && TREE_INT_CST_LOW (ind) == 0)
4015                 usym = TREE_OPERAND (usym, 0);
4016             }
4017           if (TREE_CODE (csym) == ARRAY_REF)
4018             {
4019               tree ind = TREE_OPERAND (csym, 1);
4020               if (TREE_CODE (ind) == INTEGER_CST
4021                   && tree_fits_shwi_p (ind)
4022                   && TREE_INT_CST_LOW (ind) == 0)
4023                 csym = TREE_OPERAND (csym, 0);
4024             }
4025           if (operand_equal_p (usym, csym, 0))
4026             return -1;
4027         }
4028       /* Now do more complex comparison  */
4029       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4030       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4031       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4032         return -1;
4033     }
4034
4035   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4036   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4037
4038   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
4039   aff_combination_add (&ubase_aff, &cbase_aff);
4040   expr = aff_combination_to_tree (&ubase_aff);
4041   return get_expr_id (data, expr);
4042 }
4043
4044
4045
4046 /* Determines the cost of the computation by that USE is expressed
4047    from induction variable CAND.  If ADDRESS_P is true, we just need
4048    to create an address from it, otherwise we want to get it into
4049    register.  A set of invariants we depend on is stored in
4050    DEPENDS_ON.  AT is the statement at that the value is computed.
4051    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4052    addressing is likely.  */
4053
4054 static comp_cost
4055 get_computation_cost_at (struct ivopts_data *data,
4056                          struct iv_use *use, struct iv_cand *cand,
4057                          bool address_p, bitmap *depends_on, gimple at,
4058                          bool *can_autoinc,
4059                          int *inv_expr_id)
4060 {
4061   tree ubase = use->iv->base, ustep = use->iv->step;
4062   tree cbase, cstep;
4063   tree utype = TREE_TYPE (ubase), ctype;
4064   unsigned HOST_WIDE_INT cstepi, offset = 0;
4065   HOST_WIDE_INT ratio, aratio;
4066   bool var_present, symbol_present, stmt_is_after_inc;
4067   comp_cost cost;
4068   double_int rat;
4069   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4070   enum machine_mode mem_mode = (address_p
4071                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4072                                 : VOIDmode);
4073
4074   *depends_on = NULL;
4075
4076   /* Only consider real candidates.  */
4077   if (!cand->iv)
4078     return infinite_cost;
4079
4080   cbase = cand->iv->base;
4081   cstep = cand->iv->step;
4082   ctype = TREE_TYPE (cbase);
4083
4084   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4085     {
4086       /* We do not have a precision to express the values of use.  */
4087       return infinite_cost;
4088     }
4089
4090   if (address_p
4091       || (use->iv->base_object
4092           && cand->iv->base_object
4093           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4094           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4095     {
4096       /* Do not try to express address of an object with computation based
4097          on address of a different object.  This may cause problems in rtl
4098          level alias analysis (that does not expect this to be happening,
4099          as this is illegal in C), and would be unlikely to be useful
4100          anyway.  */
4101       if (use->iv->base_object
4102           && cand->iv->base_object
4103           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4104         return infinite_cost;
4105     }
4106
4107   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4108     {
4109       /* TODO -- add direct handling of this case.  */
4110       goto fallback;
4111     }
4112
4113   /* CSTEPI is removed from the offset in case statement is after the
4114      increment.  If the step is not constant, we use zero instead.
4115      This is a bit imprecise (there is the extra addition), but
4116      redundancy elimination is likely to transform the code so that
4117      it uses value of the variable before increment anyway,
4118      so it is not that much unrealistic.  */
4119   if (cst_and_fits_in_hwi (cstep))
4120     cstepi = int_cst_value (cstep);
4121   else
4122     cstepi = 0;
4123
4124   if (!constant_multiple_of (ustep, cstep, &rat))
4125     return infinite_cost;
4126
4127   if (rat.fits_shwi ())
4128     ratio = rat.to_shwi ();
4129   else
4130     return infinite_cost;
4131
4132   STRIP_NOPS (cbase);
4133   ctype = TREE_TYPE (cbase);
4134
4135   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4136
4137   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4138      or ratio == 1, it is better to handle this like
4139
4140      ubase - ratio * cbase + ratio * var
4141
4142      (also holds in the case ratio == -1, TODO.  */
4143
4144   if (cst_and_fits_in_hwi (cbase))
4145     {
4146       offset = - ratio * int_cst_value (cbase);
4147       cost = difference_cost (data,
4148                               ubase, build_int_cst (utype, 0),
4149                               &symbol_present, &var_present, &offset,
4150                               depends_on);
4151       cost.cost /= avg_loop_niter (data->current_loop);
4152     }
4153   else if (ratio == 1)
4154     {
4155       tree real_cbase = cbase;
4156
4157       /* Check to see if any adjustment is needed.  */
4158       if (cstepi == 0 && stmt_is_after_inc)
4159         {
4160           aff_tree real_cbase_aff;
4161           aff_tree cstep_aff;
4162
4163           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4164                                    &real_cbase_aff);
4165           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4166
4167           aff_combination_add (&real_cbase_aff, &cstep_aff);
4168           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4169         }
4170
4171       cost = difference_cost (data,
4172                               ubase, real_cbase,
4173                               &symbol_present, &var_present, &offset,
4174                               depends_on);
4175       cost.cost /= avg_loop_niter (data->current_loop);
4176     }
4177   else if (address_p
4178            && !POINTER_TYPE_P (ctype)
4179            && multiplier_allowed_in_address_p
4180                 (ratio, mem_mode,
4181                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4182     {
4183       cbase
4184         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4185       cost = difference_cost (data,
4186                               ubase, cbase,
4187                               &symbol_present, &var_present, &offset,
4188                               depends_on);
4189       cost.cost /= avg_loop_niter (data->current_loop);
4190     }
4191   else
4192     {
4193       cost = force_var_cost (data, cbase, depends_on);
4194       cost = add_costs (cost,
4195                         difference_cost (data,
4196                                          ubase, build_int_cst (utype, 0),
4197                                          &symbol_present, &var_present,
4198                                          &offset, depends_on));
4199       cost.cost /= avg_loop_niter (data->current_loop);
4200       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4201     }
4202
4203   if (inv_expr_id)
4204     {
4205       *inv_expr_id =
4206           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4207       /* Clear depends on.  */
4208       if (*inv_expr_id != -1 && depends_on && *depends_on)
4209         bitmap_clear (*depends_on);
4210     }
4211
4212   /* If we are after the increment, the value of the candidate is higher by
4213      one iteration.  */
4214   if (stmt_is_after_inc)
4215     offset -= ratio * cstepi;
4216
4217   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4218      (symbol/var1/const parts may be omitted).  If we are looking for an
4219      address, find the cost of addressing this.  */
4220   if (address_p)
4221     return add_costs (cost,
4222                       get_address_cost (symbol_present, var_present,
4223                                         offset, ratio, cstepi,
4224                                         mem_mode,
4225                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4226                                         speed, stmt_is_after_inc,
4227                                         can_autoinc));
4228
4229   /* Otherwise estimate the costs for computing the expression.  */
4230   if (!symbol_present && !var_present && !offset)
4231     {
4232       if (ratio != 1)
4233         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4234       return cost;
4235     }
4236
4237   /* Symbol + offset should be compile-time computable so consider that they
4238       are added once to the variable, if present.  */
4239   if (var_present && (symbol_present || offset))
4240     cost.cost += adjust_setup_cost (data,
4241                                     add_cost (speed, TYPE_MODE (ctype)));
4242
4243   /* Having offset does not affect runtime cost in case it is added to
4244      symbol, but it increases complexity.  */
4245   if (offset)
4246     cost.complexity++;
4247
4248   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4249
4250   aratio = ratio > 0 ? ratio : -ratio;
4251   if (aratio != 1)
4252     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4253   return cost;
4254
4255 fallback:
4256   if (can_autoinc)
4257     *can_autoinc = false;
4258
4259   {
4260     /* Just get the expression, expand it and measure the cost.  */
4261     tree comp = get_computation_at (data->current_loop, use, cand, at);
4262
4263     if (!comp)
4264       return infinite_cost;
4265
4266     if (address_p)
4267       comp = build_simple_mem_ref (comp);
4268
4269     return new_cost (computation_cost (comp, speed), 0);
4270   }
4271 }
4272
4273 /* Determines the cost of the computation by that USE is expressed
4274    from induction variable CAND.  If ADDRESS_P is true, we just need
4275    to create an address from it, otherwise we want to get it into
4276    register.  A set of invariants we depend on is stored in
4277    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4278    autoinc addressing is likely.  */
4279
4280 static comp_cost
4281 get_computation_cost (struct ivopts_data *data,
4282                       struct iv_use *use, struct iv_cand *cand,
4283                       bool address_p, bitmap *depends_on,
4284                       bool *can_autoinc, int *inv_expr_id)
4285 {
4286   return get_computation_cost_at (data,
4287                                   use, cand, address_p, depends_on, use->stmt,
4288                                   can_autoinc, inv_expr_id);
4289 }
4290
4291 /* Determines cost of basing replacement of USE on CAND in a generic
4292    expression.  */
4293
4294 static bool
4295 determine_use_iv_cost_generic (struct ivopts_data *data,
4296                                struct iv_use *use, struct iv_cand *cand)
4297 {
4298   bitmap depends_on;
4299   comp_cost cost;
4300   int inv_expr_id = -1;
4301
4302   /* The simple case first -- if we need to express value of the preserved
4303      original biv, the cost is 0.  This also prevents us from counting the
4304      cost of increment twice -- once at this use and once in the cost of
4305      the candidate.  */
4306   if (cand->pos == IP_ORIGINAL
4307       && cand->incremented_at == use->stmt)
4308     {
4309       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4310                        ERROR_MARK, -1);
4311       return true;
4312     }
4313
4314   cost = get_computation_cost (data, use, cand, false, &depends_on,
4315                                NULL, &inv_expr_id);
4316
4317   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4318                    inv_expr_id);
4319
4320   return !infinite_cost_p (cost);
4321 }
4322
4323 /* Determines cost of basing replacement of USE on CAND in an address.  */
4324
4325 static bool
4326 determine_use_iv_cost_address (struct ivopts_data *data,
4327                                struct iv_use *use, struct iv_cand *cand)
4328 {
4329   bitmap depends_on;
4330   bool can_autoinc;
4331   int inv_expr_id = -1;
4332   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4333                                          &can_autoinc, &inv_expr_id);
4334
4335   if (cand->ainc_use == use)
4336     {
4337       if (can_autoinc)
4338         cost.cost -= cand->cost_step;
4339       /* If we generated the candidate solely for exploiting autoincrement
4340          opportunities, and it turns out it can't be used, set the cost to
4341          infinity to make sure we ignore it.  */
4342       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4343         cost = infinite_cost;
4344     }
4345   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4346                    inv_expr_id);
4347
4348   return !infinite_cost_p (cost);
4349 }
4350
4351 /* Computes value of candidate CAND at position AT in iteration NITER, and
4352    stores it to VAL.  */
4353
4354 static void
4355 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4356                aff_tree *val)
4357 {
4358   aff_tree step, delta, nit;
4359   struct iv *iv = cand->iv;
4360   tree type = TREE_TYPE (iv->base);
4361   tree steptype = type;
4362   if (POINTER_TYPE_P (type))
4363     steptype = sizetype;
4364
4365   tree_to_aff_combination (iv->step, steptype, &step);
4366   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4367   aff_combination_convert (&nit, steptype);
4368   aff_combination_mult (&nit, &step, &delta);
4369   if (stmt_after_increment (loop, cand, at))
4370     aff_combination_add (&delta, &step);
4371
4372   tree_to_aff_combination (iv->base, type, val);
4373   aff_combination_add (val, &delta);
4374 }
4375
4376 /* Returns period of induction variable iv.  */
4377
4378 static tree
4379 iv_period (struct iv *iv)
4380 {
4381   tree step = iv->step, period, type;
4382   tree pow2div;
4383
4384   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4385
4386   type = unsigned_type_for (TREE_TYPE (step));
4387   /* Period of the iv is lcm (step, type_range)/step -1,
4388      i.e., N*type_range/step - 1. Since type range is power
4389      of two, N == (step >> num_of_ending_zeros_binary (step),
4390      so the final result is
4391
4392        (type_range >> num_of_ending_zeros_binary (step)) - 1
4393
4394   */
4395   pow2div = num_ending_zeros (step);
4396
4397   period = build_low_bits_mask (type,
4398                                 (TYPE_PRECISION (type)
4399                                  - tree_to_uhwi (pow2div)));
4400
4401   return period;
4402 }
4403
4404 /* Returns the comparison operator used when eliminating the iv USE.  */
4405
4406 static enum tree_code
4407 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4408 {
4409   struct loop *loop = data->current_loop;
4410   basic_block ex_bb;
4411   edge exit;
4412
4413   ex_bb = gimple_bb (use->stmt);
4414   exit = EDGE_SUCC (ex_bb, 0);
4415   if (flow_bb_inside_loop_p (loop, exit->dest))
4416     exit = EDGE_SUCC (ex_bb, 1);
4417
4418   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4419 }
4420
4421 static tree
4422 strip_wrap_conserving_type_conversions (tree exp)
4423 {
4424   while (tree_ssa_useless_type_conversion (exp)
4425          && (nowrap_type_p (TREE_TYPE (exp))
4426              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4427     exp = TREE_OPERAND (exp, 0);
4428   return exp;
4429 }
4430
4431 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4432    check for an exact match.  */
4433
4434 static bool
4435 expr_equal_p (tree e, tree what)
4436 {
4437   gimple stmt;
4438   enum tree_code code;
4439
4440   e = strip_wrap_conserving_type_conversions (e);
4441   what = strip_wrap_conserving_type_conversions (what);
4442
4443   code = TREE_CODE (what);
4444   if (TREE_TYPE (e) != TREE_TYPE (what))
4445     return false;
4446
4447   if (operand_equal_p (e, what, 0))
4448     return true;
4449
4450   if (TREE_CODE (e) != SSA_NAME)
4451     return false;
4452
4453   stmt = SSA_NAME_DEF_STMT (e);
4454   if (gimple_code (stmt) != GIMPLE_ASSIGN
4455       || gimple_assign_rhs_code (stmt) != code)
4456     return false;
4457
4458   switch (get_gimple_rhs_class (code))
4459     {
4460     case GIMPLE_BINARY_RHS:
4461       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4462         return false;
4463       /* Fallthru.  */
4464
4465     case GIMPLE_UNARY_RHS:
4466     case GIMPLE_SINGLE_RHS:
4467       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4468     default:
4469       return false;
4470     }
4471 }
4472
4473 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4474    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4475    calculation is performed in non-wrapping type.
4476
4477    TODO: More generally, we could test for the situation that
4478          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4479          This would require knowing the sign of OFFSET.
4480
4481          Also, we only look for the first addition in the computation of BASE.
4482          More complex analysis would be better, but introducing it just for
4483          this optimization seems like an overkill.  */
4484
4485 static bool
4486 difference_cannot_overflow_p (tree base, tree offset)
4487 {
4488   enum tree_code code;
4489   tree e1, e2;
4490
4491   if (!nowrap_type_p (TREE_TYPE (base)))
4492     return false;
4493
4494   base = expand_simple_operations (base);
4495
4496   if (TREE_CODE (base) == SSA_NAME)
4497     {
4498       gimple stmt = SSA_NAME_DEF_STMT (base);
4499
4500       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4501         return false;
4502
4503       code = gimple_assign_rhs_code (stmt);
4504       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4505         return false;
4506
4507       e1 = gimple_assign_rhs1 (stmt);
4508       e2 = gimple_assign_rhs2 (stmt);
4509     }
4510   else
4511     {
4512       code = TREE_CODE (base);
4513       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4514         return false;
4515       e1 = TREE_OPERAND (base, 0);
4516       e2 = TREE_OPERAND (base, 1);
4517     }
4518
4519   /* TODO: deeper inspection may be necessary to prove the equality.  */
4520   switch (code)
4521     {
4522     case PLUS_EXPR:
4523       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4524     case POINTER_PLUS_EXPR:
4525       return expr_equal_p (e2, offset);
4526
4527     default:
4528       return false;
4529     }
4530 }
4531
4532 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4533    comparison with CAND.  NITER describes the number of iterations of
4534    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4535
4536    We aim to handle the following situation:
4537
4538    sometype *base, *p;
4539    int a, b, i;
4540
4541    i = a;
4542    p = p_0 = base + a;
4543
4544    do
4545      {
4546        bla (*p);
4547        p++;
4548        i++;
4549      }
4550    while (i < b);
4551
4552    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4553    We aim to optimize this to
4554
4555    p = p_0 = base + a;
4556    do
4557      {
4558        bla (*p);
4559        p++;
4560      }
4561    while (p < p_0 - a + b);
4562
4563    This preserves the correctness, since the pointer arithmetics does not
4564    overflow.  More precisely:
4565
4566    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4567       overflow in computing it or the values of p.
4568    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4569       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4570
4571 static bool
4572 iv_elimination_compare_lt (struct ivopts_data *data,
4573                            struct iv_cand *cand, enum tree_code *comp_p,
4574                            struct tree_niter_desc *niter)
4575 {
4576   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4577   struct affine_tree_combination nit, tmpa, tmpb;
4578   enum tree_code comp;
4579   HOST_WIDE_INT step;
4580
4581   /* We need to know that the candidate induction variable does not overflow.
4582      While more complex analysis may be used to prove this, for now just
4583      check that the variable appears in the original program and that it
4584      is computed in a type that guarantees no overflows.  */
4585   cand_type = TREE_TYPE (cand->iv->base);
4586   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4587     return false;
4588
4589   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4590      the calculation of the BOUND could overflow, making the comparison
4591      invalid.  */
4592   if (!data->loop_single_exit_p)
4593     return false;
4594
4595   /* We need to be able to decide whether candidate is increasing or decreasing
4596      in order to choose the right comparison operator.  */
4597   if (!cst_and_fits_in_hwi (cand->iv->step))
4598     return false;
4599   step = int_cst_value (cand->iv->step);
4600
4601   /* Check that the number of iterations matches the expected pattern:
4602      a + 1 > b ? 0 : b - a - 1.  */
4603   mbz = niter->may_be_zero;
4604   if (TREE_CODE (mbz) == GT_EXPR)
4605     {
4606       /* Handle a + 1 > b.  */
4607       tree op0 = TREE_OPERAND (mbz, 0);
4608       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4609         {
4610           a = TREE_OPERAND (op0, 0);
4611           b = TREE_OPERAND (mbz, 1);
4612         }
4613       else
4614         return false;
4615     }
4616   else if (TREE_CODE (mbz) == LT_EXPR)
4617     {
4618       tree op1 = TREE_OPERAND (mbz, 1);
4619
4620       /* Handle b < a + 1.  */
4621       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4622         {
4623           a = TREE_OPERAND (op1, 0);
4624           b = TREE_OPERAND (mbz, 0);
4625         }
4626       else
4627         return false;
4628     }
4629   else
4630     return false;
4631
4632   /* Expected number of iterations is B - A - 1.  Check that it matches
4633      the actual number, i.e., that B - A - NITER = 1.  */
4634   tree_to_aff_combination (niter->niter, nit_type, &nit);
4635   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4636   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4637   aff_combination_scale (&nit, double_int_minus_one);
4638   aff_combination_scale (&tmpa, double_int_minus_one);
4639   aff_combination_add (&tmpb, &tmpa);
4640   aff_combination_add (&tmpb, &nit);
4641   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4642     return false;
4643
4644   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4645      overflow.  */
4646   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4647                         cand->iv->step,
4648                         fold_convert (TREE_TYPE (cand->iv->step), a));
4649   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4650     return false;
4651
4652   /* Determine the new comparison operator.  */
4653   comp = step < 0 ? GT_EXPR : LT_EXPR;
4654   if (*comp_p == NE_EXPR)
4655     *comp_p = comp;
4656   else if (*comp_p == EQ_EXPR)
4657     *comp_p = invert_tree_comparison (comp, false);
4658   else
4659     gcc_unreachable ();
4660
4661   return true;
4662 }
4663
4664 /* Check whether it is possible to express the condition in USE by comparison
4665    of candidate CAND.  If so, store the value compared with to BOUND, and the
4666    comparison operator to COMP.  */
4667
4668 static bool
4669 may_eliminate_iv (struct ivopts_data *data,
4670                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4671                   enum tree_code *comp)
4672 {
4673   basic_block ex_bb;
4674   edge exit;
4675   tree period;
4676   struct loop *loop = data->current_loop;
4677   aff_tree bnd;
4678   struct tree_niter_desc *desc = NULL;
4679
4680   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4681     return false;
4682
4683   /* For now works only for exits that dominate the loop latch.
4684      TODO: extend to other conditions inside loop body.  */
4685   ex_bb = gimple_bb (use->stmt);
4686   if (use->stmt != last_stmt (ex_bb)
4687       || gimple_code (use->stmt) != GIMPLE_COND
4688       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4689     return false;
4690
4691   exit = EDGE_SUCC (ex_bb, 0);
4692   if (flow_bb_inside_loop_p (loop, exit->dest))
4693     exit = EDGE_SUCC (ex_bb, 1);
4694   if (flow_bb_inside_loop_p (loop, exit->dest))
4695     return false;
4696
4697   desc = niter_for_exit (data, exit);
4698   if (!desc)
4699     return false;
4700
4701   /* Determine whether we can use the variable to test the exit condition.
4702      This is the case iff the period of the induction variable is greater
4703      than the number of iterations for which the exit condition is true.  */
4704   period = iv_period (cand->iv);
4705
4706   /* If the number of iterations is constant, compare against it directly.  */
4707   if (TREE_CODE (desc->niter) == INTEGER_CST)
4708     {
4709       /* See cand_value_at.  */
4710       if (stmt_after_increment (loop, cand, use->stmt))
4711         {
4712           if (!tree_int_cst_lt (desc->niter, period))
4713             return false;
4714         }
4715       else
4716         {
4717           if (tree_int_cst_lt (period, desc->niter))
4718             return false;
4719         }
4720     }
4721
4722   /* If not, and if this is the only possible exit of the loop, see whether
4723      we can get a conservative estimate on the number of iterations of the
4724      entire loop and compare against that instead.  */
4725   else
4726     {
4727       double_int period_value, max_niter;
4728
4729       max_niter = desc->max;
4730       if (stmt_after_increment (loop, cand, use->stmt))
4731         max_niter += double_int_one;
4732       period_value = tree_to_double_int (period);
4733       if (max_niter.ugt (period_value))
4734         {
4735           /* See if we can take advantage of inferred loop bound information.  */
4736           if (data->loop_single_exit_p)
4737             {
4738               if (!max_loop_iterations (loop, &max_niter))
4739                 return false;
4740               /* The loop bound is already adjusted by adding 1.  */
4741               if (max_niter.ugt (period_value))
4742                 return false;
4743             }
4744           else
4745             return false;
4746         }
4747     }
4748
4749   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4750
4751   *bound = aff_combination_to_tree (&bnd);
4752   *comp = iv_elimination_compare (data, use);
4753
4754   /* It is unlikely that computing the number of iterations using division
4755      would be more profitable than keeping the original induction variable.  */
4756   if (expression_expensive_p (*bound))
4757     return false;
4758
4759   /* Sometimes, it is possible to handle the situation that the number of
4760      iterations may be zero unless additional assumtions by using <
4761      instead of != in the exit condition.
4762
4763      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4764            base the exit condition on it.  However, that is often too
4765            expensive.  */
4766   if (!integer_zerop (desc->may_be_zero))
4767     return iv_elimination_compare_lt (data, cand, comp, desc);
4768
4769   return true;
4770 }
4771
4772  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4773     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4774
4775 static int
4776 parm_decl_cost (struct ivopts_data *data, tree bound)
4777 {
4778   tree sbound = bound;
4779   STRIP_NOPS (sbound);
4780
4781   if (TREE_CODE (sbound) == SSA_NAME
4782       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4783       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4784       && data->body_includes_call)
4785     return COSTS_N_INSNS (1);
4786
4787   return 0;
4788 }
4789
4790 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4791
4792 static bool
4793 determine_use_iv_cost_condition (struct ivopts_data *data,
4794                                  struct iv_use *use, struct iv_cand *cand)
4795 {
4796   tree bound = NULL_TREE;
4797   struct iv *cmp_iv;
4798   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4799   comp_cost elim_cost, express_cost, cost, bound_cost;
4800   bool ok;
4801   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4802   tree *control_var, *bound_cst;
4803   enum tree_code comp = ERROR_MARK;
4804
4805   /* Only consider real candidates.  */
4806   if (!cand->iv)
4807     {
4808       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4809                        ERROR_MARK, -1);
4810       return false;
4811     }
4812
4813   /* Try iv elimination.  */
4814   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4815     {
4816       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4817       if (elim_cost.cost == 0)
4818         elim_cost.cost = parm_decl_cost (data, bound);
4819       else if (TREE_CODE (bound) == INTEGER_CST)
4820         elim_cost.cost = 0;
4821       /* If we replace a loop condition 'i < n' with 'p < base + n',
4822          depends_on_elim will have 'base' and 'n' set, which implies
4823          that both 'base' and 'n' will be live during the loop.  More likely,
4824          'base + n' will be loop invariant, resulting in only one live value
4825          during the loop.  So in that case we clear depends_on_elim and set
4826         elim_inv_expr_id instead.  */
4827       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4828         {
4829           elim_inv_expr_id = get_expr_id (data, bound);
4830           bitmap_clear (depends_on_elim);
4831         }
4832       /* The bound is a loop invariant, so it will be only computed
4833          once.  */
4834       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4835     }
4836   else
4837     elim_cost = infinite_cost;
4838
4839   /* Try expressing the original giv.  If it is compared with an invariant,
4840      note that we cannot get rid of it.  */
4841   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4842                               NULL, &cmp_iv);
4843   gcc_assert (ok);
4844
4845   /* When the condition is a comparison of the candidate IV against
4846      zero, prefer this IV.
4847
4848      TODO: The constant that we're subtracting from the cost should
4849      be target-dependent.  This information should be added to the
4850      target costs for each backend.  */
4851   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4852       && integer_zerop (*bound_cst)
4853       && (operand_equal_p (*control_var, cand->var_after, 0)
4854           || operand_equal_p (*control_var, cand->var_before, 0)))
4855     elim_cost.cost -= 1;
4856
4857   express_cost = get_computation_cost (data, use, cand, false,
4858                                        &depends_on_express, NULL,
4859                                        &express_inv_expr_id);
4860   fd_ivopts_data = data;
4861   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4862
4863   /* Count the cost of the original bound as well.  */
4864   bound_cost = force_var_cost (data, *bound_cst, NULL);
4865   if (bound_cost.cost == 0)
4866     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4867   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4868     bound_cost.cost = 0;
4869   express_cost.cost += bound_cost.cost;
4870
4871   /* Choose the better approach, preferring the eliminated IV. */
4872   if (compare_costs (elim_cost, express_cost) <= 0)
4873     {
4874       cost = elim_cost;
4875       depends_on = depends_on_elim;
4876       depends_on_elim = NULL;
4877       inv_expr_id = elim_inv_expr_id;
4878     }
4879   else
4880     {
4881       cost = express_cost;
4882       depends_on = depends_on_express;
4883       depends_on_express = NULL;
4884       bound = NULL_TREE;
4885       comp = ERROR_MARK;
4886       inv_expr_id = express_inv_expr_id;
4887     }
4888
4889   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4890
4891   if (depends_on_elim)
4892     BITMAP_FREE (depends_on_elim);
4893   if (depends_on_express)
4894     BITMAP_FREE (depends_on_express);
4895
4896   return !infinite_cost_p (cost);
4897 }
4898
4899 /* Determines cost of basing replacement of USE on CAND.  Returns false
4900    if USE cannot be based on CAND.  */
4901
4902 static bool
4903 determine_use_iv_cost (struct ivopts_data *data,
4904                        struct iv_use *use, struct iv_cand *cand)
4905 {
4906   switch (use->type)
4907     {
4908     case USE_NONLINEAR_EXPR:
4909       return determine_use_iv_cost_generic (data, use, cand);
4910
4911     case USE_ADDRESS:
4912       return determine_use_iv_cost_address (data, use, cand);
4913
4914     case USE_COMPARE:
4915       return determine_use_iv_cost_condition (data, use, cand);
4916
4917     default:
4918       gcc_unreachable ();
4919     }
4920 }
4921
4922 /* Return true if get_computation_cost indicates that autoincrement is
4923    a possibility for the pair of USE and CAND, false otherwise.  */
4924
4925 static bool
4926 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4927                            struct iv_cand *cand)
4928 {
4929   bitmap depends_on;
4930   bool can_autoinc;
4931   comp_cost cost;
4932
4933   if (use->type != USE_ADDRESS)
4934     return false;
4935
4936   cost = get_computation_cost (data, use, cand, true, &depends_on,
4937                                &can_autoinc, NULL);
4938
4939   BITMAP_FREE (depends_on);
4940
4941   return !infinite_cost_p (cost) && can_autoinc;
4942 }
4943
4944 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4945    use that allows autoincrement, and set their AINC_USE if possible.  */
4946
4947 static void
4948 set_autoinc_for_original_candidates (struct ivopts_data *data)
4949 {
4950   unsigned i, j;
4951
4952   for (i = 0; i < n_iv_cands (data); i++)
4953     {
4954       struct iv_cand *cand = iv_cand (data, i);
4955       struct iv_use *closest_before = NULL;
4956       struct iv_use *closest_after = NULL;
4957       if (cand->pos != IP_ORIGINAL)
4958         continue;
4959
4960       for (j = 0; j < n_iv_uses (data); j++)
4961         {
4962           struct iv_use *use = iv_use (data, j);
4963           unsigned uid = gimple_uid (use->stmt);
4964
4965           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4966             continue;
4967
4968           if (uid < gimple_uid (cand->incremented_at)
4969               && (closest_before == NULL
4970                   || uid > gimple_uid (closest_before->stmt)))
4971             closest_before = use;
4972
4973           if (uid > gimple_uid (cand->incremented_at)
4974               && (closest_after == NULL
4975                   || uid < gimple_uid (closest_after->stmt)))
4976             closest_after = use;
4977         }
4978
4979       if (closest_before != NULL
4980           && autoinc_possible_for_pair (data, closest_before, cand))
4981         cand->ainc_use = closest_before;
4982       else if (closest_after != NULL
4983                && autoinc_possible_for_pair (data, closest_after, cand))
4984         cand->ainc_use = closest_after;
4985     }
4986 }
4987
4988 /* Finds the candidates for the induction variables.  */
4989
4990 static void
4991 find_iv_candidates (struct ivopts_data *data)
4992 {
4993   /* Add commonly used ivs.  */
4994   add_standard_iv_candidates (data);
4995
4996   /* Add old induction variables.  */
4997   add_old_ivs_candidates (data);
4998
4999   /* Add induction variables derived from uses.  */
5000   add_derived_ivs_candidates (data);
5001
5002   set_autoinc_for_original_candidates (data);
5003
5004   /* Record the important candidates.  */
5005   record_important_candidates (data);
5006 }
5007
5008 /* Determines costs of basing the use of the iv on an iv candidate.  */
5009
5010 static void
5011 determine_use_iv_costs (struct ivopts_data *data)
5012 {
5013   unsigned i, j;
5014   struct iv_use *use;
5015   struct iv_cand *cand;
5016   bitmap to_clear = BITMAP_ALLOC (NULL);
5017
5018   alloc_use_cost_map (data);
5019
5020   for (i = 0; i < n_iv_uses (data); i++)
5021     {
5022       use = iv_use (data, i);
5023
5024       if (data->consider_all_candidates)
5025         {
5026           for (j = 0; j < n_iv_cands (data); j++)
5027             {
5028               cand = iv_cand (data, j);
5029               determine_use_iv_cost (data, use, cand);
5030             }
5031         }
5032       else
5033         {
5034           bitmap_iterator bi;
5035
5036           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5037             {
5038               cand = iv_cand (data, j);
5039               if (!determine_use_iv_cost (data, use, cand))
5040                 bitmap_set_bit (to_clear, j);
5041             }
5042
5043           /* Remove the candidates for that the cost is infinite from
5044              the list of related candidates.  */
5045           bitmap_and_compl_into (use->related_cands, to_clear);
5046           bitmap_clear (to_clear);
5047         }
5048     }
5049
5050   BITMAP_FREE (to_clear);
5051
5052   if (dump_file && (dump_flags & TDF_DETAILS))
5053     {
5054       fprintf (dump_file, "Use-candidate costs:\n");
5055
5056       for (i = 0; i < n_iv_uses (data); i++)
5057         {
5058           use = iv_use (data, i);
5059
5060           fprintf (dump_file, "Use %d:\n", i);
5061           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5062           for (j = 0; j < use->n_map_members; j++)
5063             {
5064               if (!use->cost_map[j].cand
5065                   || infinite_cost_p (use->cost_map[j].cost))
5066                 continue;
5067
5068               fprintf (dump_file, "  %d\t%d\t%d\t",
5069                        use->cost_map[j].cand->id,
5070                        use->cost_map[j].cost.cost,
5071                        use->cost_map[j].cost.complexity);
5072               if (use->cost_map[j].depends_on)
5073                 bitmap_print (dump_file,
5074                               use->cost_map[j].depends_on, "","");
5075               if (use->cost_map[j].inv_expr_id != -1)
5076                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5077               fprintf (dump_file, "\n");
5078             }
5079
5080           fprintf (dump_file, "\n");
5081         }
5082       fprintf (dump_file, "\n");
5083     }
5084 }
5085
5086 /* Determines cost of the candidate CAND.  */
5087
5088 static void
5089 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5090 {
5091   comp_cost cost_base;
5092   unsigned cost, cost_step;
5093   tree base;
5094
5095   if (!cand->iv)
5096     {
5097       cand->cost = 0;
5098       return;
5099     }
5100
5101   /* There are two costs associated with the candidate -- its increment
5102      and its initialization.  The second is almost negligible for any loop
5103      that rolls enough, so we take it just very little into account.  */
5104
5105   base = cand->iv->base;
5106   cost_base = force_var_cost (data, base, NULL);
5107   /* It will be exceptional that the iv register happens to be initialized with
5108      the proper value at no cost.  In general, there will at least be a regcopy
5109      or a const set.  */
5110   if (cost_base.cost == 0)
5111     cost_base.cost = COSTS_N_INSNS (1);
5112   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5113
5114   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5115
5116   /* Prefer the original ivs unless we may gain something by replacing it.
5117      The reason is to make debugging simpler; so this is not relevant for
5118      artificial ivs created by other optimization passes.  */
5119   if (cand->pos != IP_ORIGINAL
5120       || !SSA_NAME_VAR (cand->var_before)
5121       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5122     cost++;
5123
5124   /* Prefer not to insert statements into latch unless there are some
5125      already (so that we do not create unnecessary jumps).  */
5126   if (cand->pos == IP_END
5127       && empty_block_p (ip_end_pos (data->current_loop)))
5128     cost++;
5129
5130   cand->cost = cost;
5131   cand->cost_step = cost_step;
5132 }
5133
5134 /* Determines costs of computation of the candidates.  */
5135
5136 static void
5137 determine_iv_costs (struct ivopts_data *data)
5138 {
5139   unsigned i;
5140
5141   if (dump_file && (dump_flags & TDF_DETAILS))
5142     {
5143       fprintf (dump_file, "Candidate costs:\n");
5144       fprintf (dump_file, "  cand\tcost\n");
5145     }
5146
5147   for (i = 0; i < n_iv_cands (data); i++)
5148     {
5149       struct iv_cand *cand = iv_cand (data, i);
5150
5151       determine_iv_cost (data, cand);
5152
5153       if (dump_file && (dump_flags & TDF_DETAILS))
5154         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5155     }
5156
5157   if (dump_file && (dump_flags & TDF_DETAILS))
5158     fprintf (dump_file, "\n");
5159 }
5160
5161 /* Calculates cost for having SIZE induction variables.  */
5162
5163 static unsigned
5164 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5165 {
5166   /* We add size to the cost, so that we prefer eliminating ivs
5167      if possible.  */
5168   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5169                                             data->body_includes_call);
5170 }
5171
5172 /* For each size of the induction variable set determine the penalty.  */
5173
5174 static void
5175 determine_set_costs (struct ivopts_data *data)
5176 {
5177   unsigned j, n;
5178   gimple phi;
5179   gimple_stmt_iterator psi;
5180   tree op;
5181   struct loop *loop = data->current_loop;
5182   bitmap_iterator bi;
5183
5184   if (dump_file && (dump_flags & TDF_DETAILS))
5185     {
5186       fprintf (dump_file, "Global costs:\n");
5187       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5188       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5189       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5190       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5191     }
5192
5193   n = 0;
5194   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5195     {
5196       phi = gsi_stmt (psi);
5197       op = PHI_RESULT (phi);
5198
5199       if (virtual_operand_p (op))
5200         continue;
5201
5202       if (get_iv (data, op))
5203         continue;
5204
5205       n++;
5206     }
5207
5208   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5209     {
5210       struct version_info *info = ver_info (data, j);
5211
5212       if (info->inv_id && info->has_nonlin_use)
5213         n++;
5214     }
5215
5216   data->regs_used = n;
5217   if (dump_file && (dump_flags & TDF_DETAILS))
5218     fprintf (dump_file, "  regs_used %d\n", n);
5219
5220   if (dump_file && (dump_flags & TDF_DETAILS))
5221     {
5222       fprintf (dump_file, "  cost for size:\n");
5223       fprintf (dump_file, "  ivs\tcost\n");
5224       for (j = 0; j <= 2 * target_avail_regs; j++)
5225         fprintf (dump_file, "  %d\t%d\n", j,
5226                  ivopts_global_cost_for_size (data, j));
5227       fprintf (dump_file, "\n");
5228     }
5229 }
5230
5231 /* Returns true if A is a cheaper cost pair than B.  */
5232
5233 static bool
5234 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5235 {
5236   int cmp;
5237
5238   if (!a)
5239     return false;
5240
5241   if (!b)
5242     return true;
5243
5244   cmp = compare_costs (a->cost, b->cost);
5245   if (cmp < 0)
5246     return true;
5247
5248   if (cmp > 0)
5249     return false;
5250
5251   /* In case the costs are the same, prefer the cheaper candidate.  */
5252   if (a->cand->cost < b->cand->cost)
5253     return true;
5254
5255   return false;
5256 }
5257
5258
5259 /* Returns candidate by that USE is expressed in IVS.  */
5260
5261 static struct cost_pair *
5262 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5263 {
5264   return ivs->cand_for_use[use->id];
5265 }
5266
5267 /* Computes the cost field of IVS structure.  */
5268
5269 static void
5270 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5271 {
5272   comp_cost cost = ivs->cand_use_cost;
5273
5274   cost.cost += ivs->cand_cost;
5275
5276   cost.cost += ivopts_global_cost_for_size (data,
5277                                             ivs->n_regs + ivs->num_used_inv_expr);
5278
5279   ivs->cost = cost;
5280 }
5281
5282 /* Remove invariants in set INVS to set IVS.  */
5283
5284 static void
5285 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5286 {
5287   bitmap_iterator bi;
5288   unsigned iid;
5289
5290   if (!invs)
5291     return;
5292
5293   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5294     {
5295       ivs->n_invariant_uses[iid]--;
5296       if (ivs->n_invariant_uses[iid] == 0)
5297         ivs->n_regs--;
5298     }
5299 }
5300
5301 /* Set USE not to be expressed by any candidate in IVS.  */
5302
5303 static void
5304 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5305                  struct iv_use *use)
5306 {
5307   unsigned uid = use->id, cid;
5308   struct cost_pair *cp;
5309
5310   cp = ivs->cand_for_use[uid];
5311   if (!cp)
5312     return;
5313   cid = cp->cand->id;
5314
5315   ivs->bad_uses++;
5316   ivs->cand_for_use[uid] = NULL;
5317   ivs->n_cand_uses[cid]--;
5318
5319   if (ivs->n_cand_uses[cid] == 0)
5320     {
5321       bitmap_clear_bit (ivs->cands, cid);
5322       /* Do not count the pseudocandidates.  */
5323       if (cp->cand->iv)
5324         ivs->n_regs--;
5325       ivs->n_cands--;
5326       ivs->cand_cost -= cp->cand->cost;
5327
5328       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5329     }
5330
5331   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5332
5333   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5334
5335   if (cp->inv_expr_id != -1)
5336     {
5337       ivs->used_inv_expr[cp->inv_expr_id]--;
5338       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5339         ivs->num_used_inv_expr--;
5340     }
5341   iv_ca_recount_cost (data, ivs);
5342 }
5343
5344 /* Add invariants in set INVS to set IVS.  */
5345
5346 static void
5347 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5348 {
5349   bitmap_iterator bi;
5350   unsigned iid;
5351
5352   if (!invs)
5353     return;
5354
5355   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5356     {
5357       ivs->n_invariant_uses[iid]++;
5358       if (ivs->n_invariant_uses[iid] == 1)
5359         ivs->n_regs++;
5360     }
5361 }
5362
5363 /* Set cost pair for USE in set IVS to CP.  */
5364
5365 static void
5366 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5367               struct iv_use *use, struct cost_pair *cp)
5368 {
5369   unsigned uid = use->id, cid;
5370
5371   if (ivs->cand_for_use[uid] == cp)
5372     return;
5373
5374   if (ivs->cand_for_use[uid])
5375     iv_ca_set_no_cp (data, ivs, use);
5376
5377   if (cp)
5378     {
5379       cid = cp->cand->id;
5380
5381       ivs->bad_uses--;
5382       ivs->cand_for_use[uid] = cp;
5383       ivs->n_cand_uses[cid]++;
5384       if (ivs->n_cand_uses[cid] == 1)
5385         {
5386           bitmap_set_bit (ivs->cands, cid);
5387           /* Do not count the pseudocandidates.  */
5388           if (cp->cand->iv)
5389             ivs->n_regs++;
5390           ivs->n_cands++;
5391           ivs->cand_cost += cp->cand->cost;
5392
5393           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5394         }
5395
5396       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5397       iv_ca_set_add_invariants (ivs, cp->depends_on);
5398
5399       if (cp->inv_expr_id != -1)
5400         {
5401           ivs->used_inv_expr[cp->inv_expr_id]++;
5402           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5403             ivs->num_used_inv_expr++;
5404         }
5405       iv_ca_recount_cost (data, ivs);
5406     }
5407 }
5408
5409 /* Extend set IVS by expressing USE by some of the candidates in it
5410    if possible. All important candidates will be considered
5411    if IMPORTANT_CANDIDATES is true.  */
5412
5413 static void
5414 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5415                struct iv_use *use, bool important_candidates)
5416 {
5417   struct cost_pair *best_cp = NULL, *cp;
5418   bitmap_iterator bi;
5419   bitmap cands;
5420   unsigned i;
5421
5422   gcc_assert (ivs->upto >= use->id);
5423
5424   if (ivs->upto == use->id)
5425     {
5426       ivs->upto++;
5427       ivs->bad_uses++;
5428     }
5429
5430   cands = (important_candidates ? data->important_candidates : ivs->cands);
5431   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5432     {
5433       struct iv_cand *cand = iv_cand (data, i);
5434
5435       cp = get_use_iv_cost (data, use, cand);
5436
5437       if (cheaper_cost_pair (cp, best_cp))
5438         best_cp = cp;
5439     }
5440
5441   iv_ca_set_cp (data, ivs, use, best_cp);
5442 }
5443
5444 /* Get cost for assignment IVS.  */
5445
5446 static comp_cost
5447 iv_ca_cost (struct iv_ca *ivs)
5448 {
5449   /* This was a conditional expression but it triggered a bug in
5450      Sun C 5.5.  */
5451   if (ivs->bad_uses)
5452     return infinite_cost;
5453   else
5454     return ivs->cost;
5455 }
5456
5457 /* Returns true if all dependences of CP are among invariants in IVS.  */
5458
5459 static bool
5460 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5461 {
5462   unsigned i;
5463   bitmap_iterator bi;
5464
5465   if (!cp->depends_on)
5466     return true;
5467
5468   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5469     {
5470       if (ivs->n_invariant_uses[i] == 0)
5471         return false;
5472     }
5473
5474   return true;
5475 }
5476
5477 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5478    it before NEXT_CHANGE.  */
5479
5480 static struct iv_ca_delta *
5481 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5482                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5483 {
5484   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5485
5486   change->use = use;
5487   change->old_cp = old_cp;
5488   change->new_cp = new_cp;
5489   change->next_change = next_change;
5490
5491   return change;
5492 }
5493
5494 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5495    are rewritten.  */
5496
5497 static struct iv_ca_delta *
5498 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5499 {
5500   struct iv_ca_delta *last;
5501
5502   if (!l2)
5503     return l1;
5504
5505   if (!l1)
5506     return l2;
5507
5508   for (last = l1; last->next_change; last = last->next_change)
5509     continue;
5510   last->next_change = l2;
5511
5512   return l1;
5513 }
5514
5515 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5516
5517 static struct iv_ca_delta *
5518 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5519 {
5520   struct iv_ca_delta *act, *next, *prev = NULL;
5521   struct cost_pair *tmp;
5522
5523   for (act = delta; act; act = next)
5524     {
5525       next = act->next_change;
5526       act->next_change = prev;
5527       prev = act;
5528
5529       tmp = act->old_cp;
5530       act->old_cp = act->new_cp;
5531       act->new_cp = tmp;
5532     }
5533
5534   return prev;
5535 }
5536
5537 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5538    reverted instead.  */
5539
5540 static void
5541 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5542                     struct iv_ca_delta *delta, bool forward)
5543 {
5544   struct cost_pair *from, *to;
5545   struct iv_ca_delta *act;
5546
5547   if (!forward)
5548     delta = iv_ca_delta_reverse (delta);
5549
5550   for (act = delta; act; act = act->next_change)
5551     {
5552       from = act->old_cp;
5553       to = act->new_cp;
5554       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5555       iv_ca_set_cp (data, ivs, act->use, to);
5556     }
5557
5558   if (!forward)
5559     iv_ca_delta_reverse (delta);
5560 }
5561
5562 /* Returns true if CAND is used in IVS.  */
5563
5564 static bool
5565 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5566 {
5567   return ivs->n_cand_uses[cand->id] > 0;
5568 }
5569
5570 /* Returns number of induction variable candidates in the set IVS.  */
5571
5572 static unsigned
5573 iv_ca_n_cands (struct iv_ca *ivs)
5574 {
5575   return ivs->n_cands;
5576 }
5577
5578 /* Free the list of changes DELTA.  */
5579
5580 static void
5581 iv_ca_delta_free (struct iv_ca_delta **delta)
5582 {
5583   struct iv_ca_delta *act, *next;
5584
5585   for (act = *delta; act; act = next)
5586     {
5587       next = act->next_change;
5588       free (act);
5589     }
5590
5591   *delta = NULL;
5592 }
5593
5594 /* Allocates new iv candidates assignment.  */
5595
5596 static struct iv_ca *
5597 iv_ca_new (struct ivopts_data *data)
5598 {
5599   struct iv_ca *nw = XNEW (struct iv_ca);
5600
5601   nw->upto = 0;
5602   nw->bad_uses = 0;
5603   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5604   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5605   nw->cands = BITMAP_ALLOC (NULL);
5606   nw->n_cands = 0;
5607   nw->n_regs = 0;
5608   nw->cand_use_cost = no_cost;
5609   nw->cand_cost = 0;
5610   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5611   nw->cost = no_cost;
5612   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5613   nw->num_used_inv_expr = 0;
5614
5615   return nw;
5616 }
5617
5618 /* Free memory occupied by the set IVS.  */
5619
5620 static void
5621 iv_ca_free (struct iv_ca **ivs)
5622 {
5623   free ((*ivs)->cand_for_use);
5624   free ((*ivs)->n_cand_uses);
5625   BITMAP_FREE ((*ivs)->cands);
5626   free ((*ivs)->n_invariant_uses);
5627   free ((*ivs)->used_inv_expr);
5628   free (*ivs);
5629   *ivs = NULL;
5630 }
5631
5632 /* Dumps IVS to FILE.  */
5633
5634 static void
5635 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5636 {
5637   const char *pref = "  invariants ";
5638   unsigned i;
5639   comp_cost cost = iv_ca_cost (ivs);
5640
5641   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5642   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5643            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5644   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5645
5646    for (i = 0; i < ivs->upto; i++)
5647     {
5648       struct iv_use *use = iv_use (data, i);
5649       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5650       if (cp)
5651         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5652                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5653       else
5654         fprintf (file, "   use:%d --> ??\n", use->id);
5655     }
5656
5657   for (i = 1; i <= data->max_inv_id; i++)
5658     if (ivs->n_invariant_uses[i])
5659       {
5660         fprintf (file, "%s%d", pref, i);
5661         pref = ", ";
5662       }
5663   fprintf (file, "\n\n");
5664 }
5665
5666 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5667    new set, and store differences in DELTA.  Number of induction variables
5668    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5669    the function will try to find a solution with mimimal iv candidates.  */
5670
5671 static comp_cost
5672 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5673               struct iv_cand *cand, struct iv_ca_delta **delta,
5674               unsigned *n_ivs, bool min_ncand)
5675 {
5676   unsigned i;
5677   comp_cost cost;
5678   struct iv_use *use;
5679   struct cost_pair *old_cp, *new_cp;
5680
5681   *delta = NULL;
5682   for (i = 0; i < ivs->upto; i++)
5683     {
5684       use = iv_use (data, i);
5685       old_cp = iv_ca_cand_for_use (ivs, use);
5686
5687       if (old_cp
5688           && old_cp->cand == cand)
5689         continue;
5690
5691       new_cp = get_use_iv_cost (data, use, cand);
5692       if (!new_cp)
5693         continue;
5694
5695       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5696         continue;
5697
5698       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5699         continue;
5700
5701       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5702     }
5703
5704   iv_ca_delta_commit (data, ivs, *delta, true);
5705   cost = iv_ca_cost (ivs);
5706   if (n_ivs)
5707     *n_ivs = iv_ca_n_cands (ivs);
5708   iv_ca_delta_commit (data, ivs, *delta, false);
5709
5710   return cost;
5711 }
5712
5713 /* Try narrowing set IVS by removing CAND.  Return the cost of
5714    the new set and store the differences in DELTA.  */
5715
5716 static comp_cost
5717 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5718               struct iv_cand *cand, struct iv_ca_delta **delta)
5719 {
5720   unsigned i, ci;
5721   struct iv_use *use;
5722   struct cost_pair *old_cp, *new_cp, *cp;
5723   bitmap_iterator bi;
5724   struct iv_cand *cnd;
5725   comp_cost cost;
5726
5727   *delta = NULL;
5728   for (i = 0; i < n_iv_uses (data); i++)
5729     {
5730       use = iv_use (data, i);
5731
5732       old_cp = iv_ca_cand_for_use (ivs, use);
5733       if (old_cp->cand != cand)
5734         continue;
5735
5736       new_cp = NULL;
5737
5738       if (data->consider_all_candidates)
5739         {
5740           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5741             {
5742               if (ci == cand->id)
5743                 continue;
5744
5745               cnd = iv_cand (data, ci);
5746
5747               cp = get_use_iv_cost (data, use, cnd);
5748               if (!cp)
5749                 continue;
5750
5751               if (!iv_ca_has_deps (ivs, cp))
5752                 continue;
5753
5754               if (!cheaper_cost_pair (cp, new_cp))
5755                 continue;
5756
5757               new_cp = cp;
5758             }
5759         }
5760       else
5761         {
5762           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5763             {
5764               if (ci == cand->id)
5765                 continue;
5766
5767               cnd = iv_cand (data, ci);
5768
5769               cp = get_use_iv_cost (data, use, cnd);
5770               if (!cp)
5771                 continue;
5772               if (!iv_ca_has_deps (ivs, cp))
5773                 continue;
5774
5775               if (!cheaper_cost_pair (cp, new_cp))
5776                 continue;
5777
5778               new_cp = cp;
5779             }
5780         }
5781
5782       if (!new_cp)
5783         {
5784           iv_ca_delta_free (delta);
5785           return infinite_cost;
5786         }
5787
5788       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5789     }
5790
5791   iv_ca_delta_commit (data, ivs, *delta, true);
5792   cost = iv_ca_cost (ivs);
5793   iv_ca_delta_commit (data, ivs, *delta, false);
5794
5795   return cost;
5796 }
5797
5798 /* Try optimizing the set of candidates IVS by removing candidates different
5799    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5800    differences in DELTA.  */
5801
5802 static comp_cost
5803 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5804              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5805 {
5806   bitmap_iterator bi;
5807   struct iv_ca_delta *act_delta, *best_delta;
5808   unsigned i;
5809   comp_cost best_cost, acost;
5810   struct iv_cand *cand;
5811
5812   best_delta = NULL;
5813   best_cost = iv_ca_cost (ivs);
5814
5815   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5816     {
5817       cand = iv_cand (data, i);
5818
5819       if (cand == except_cand)
5820         continue;
5821
5822       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5823
5824       if (compare_costs (acost, best_cost) < 0)
5825         {
5826           best_cost = acost;
5827           iv_ca_delta_free (&best_delta);
5828           best_delta = act_delta;
5829         }
5830       else
5831         iv_ca_delta_free (&act_delta);
5832     }
5833
5834   if (!best_delta)
5835     {
5836       *delta = NULL;
5837       return best_cost;
5838     }
5839
5840   /* Recurse to possibly remove other unnecessary ivs.  */
5841   iv_ca_delta_commit (data, ivs, best_delta, true);
5842   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5843   iv_ca_delta_commit (data, ivs, best_delta, false);
5844   *delta = iv_ca_delta_join (best_delta, *delta);
5845   return best_cost;
5846 }
5847
5848 /* Tries to extend the sets IVS in the best possible way in order
5849    to express the USE.  If ORIGINALP is true, prefer candidates from
5850    the original set of IVs, otherwise favor important candidates not
5851    based on any memory object.  */
5852
5853 static bool
5854 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5855                   struct iv_use *use, bool originalp)
5856 {
5857   comp_cost best_cost, act_cost;
5858   unsigned i;
5859   bitmap_iterator bi;
5860   struct iv_cand *cand;
5861   struct iv_ca_delta *best_delta = NULL, *act_delta;
5862   struct cost_pair *cp;
5863
5864   iv_ca_add_use (data, ivs, use, false);
5865   best_cost = iv_ca_cost (ivs);
5866
5867   cp = iv_ca_cand_for_use (ivs, use);
5868   if (!cp)
5869     {
5870       ivs->upto--;
5871       ivs->bad_uses--;
5872       iv_ca_add_use (data, ivs, use, true);
5873       best_cost = iv_ca_cost (ivs);
5874       cp = iv_ca_cand_for_use (ivs, use);
5875     }
5876   if (cp)
5877     {
5878       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5879       iv_ca_set_no_cp (data, ivs, use);
5880     }
5881
5882   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5883      first try important candidates not based on any memory object.  Only if
5884      this fails, try the specific ones.  Rationale -- in loops with many
5885      variables the best choice often is to use just one generic biv.  If we
5886      added here many ivs specific to the uses, the optimization algorithm later
5887      would be likely to get stuck in a local minimum, thus causing us to create
5888      too many ivs.  The approach from few ivs to more seems more likely to be
5889      successful -- starting from few ivs, replacing an expensive use by a
5890      specific iv should always be a win.  */
5891   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5892     {
5893       cand = iv_cand (data, i);
5894
5895       if (originalp && cand->pos !=IP_ORIGINAL)
5896         continue;
5897
5898       if (!originalp && cand->iv->base_object != NULL_TREE)
5899         continue;
5900
5901       if (iv_ca_cand_used_p (ivs, cand))
5902         continue;
5903
5904       cp = get_use_iv_cost (data, use, cand);
5905       if (!cp)
5906         continue;
5907
5908       iv_ca_set_cp (data, ivs, use, cp);
5909       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5910                                true);
5911       iv_ca_set_no_cp (data, ivs, use);
5912       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5913
5914       if (compare_costs (act_cost, best_cost) < 0)
5915         {
5916           best_cost = act_cost;
5917
5918           iv_ca_delta_free (&best_delta);
5919           best_delta = act_delta;
5920         }
5921       else
5922         iv_ca_delta_free (&act_delta);
5923     }
5924
5925   if (infinite_cost_p (best_cost))
5926     {
5927       for (i = 0; i < use->n_map_members; i++)
5928         {
5929           cp = use->cost_map + i;
5930           cand = cp->cand;
5931           if (!cand)
5932             continue;
5933
5934           /* Already tried this.  */
5935           if (cand->important)
5936             {
5937               if (originalp && cand->pos == IP_ORIGINAL)
5938                 continue;
5939               if (!originalp && cand->iv->base_object == NULL_TREE)
5940                 continue;
5941             }
5942
5943           if (iv_ca_cand_used_p (ivs, cand))
5944             continue;
5945
5946           act_delta = NULL;
5947           iv_ca_set_cp (data, ivs, use, cp);
5948           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5949           iv_ca_set_no_cp (data, ivs, use);
5950           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5951                                        cp, act_delta);
5952
5953           if (compare_costs (act_cost, best_cost) < 0)
5954             {
5955               best_cost = act_cost;
5956
5957               if (best_delta)
5958                 iv_ca_delta_free (&best_delta);
5959               best_delta = act_delta;
5960             }
5961           else
5962             iv_ca_delta_free (&act_delta);
5963         }
5964     }
5965
5966   iv_ca_delta_commit (data, ivs, best_delta, true);
5967   iv_ca_delta_free (&best_delta);
5968
5969   return !infinite_cost_p (best_cost);
5970 }
5971
5972 /* Finds an initial assignment of candidates to uses.  */
5973
5974 static struct iv_ca *
5975 get_initial_solution (struct ivopts_data *data, bool originalp)
5976 {
5977   struct iv_ca *ivs = iv_ca_new (data);
5978   unsigned i;
5979
5980   for (i = 0; i < n_iv_uses (data); i++)
5981     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5982       {
5983         iv_ca_free (&ivs);
5984         return NULL;
5985       }
5986
5987   return ivs;
5988 }
5989
5990 /* Tries to improve set of induction variables IVS.  */
5991
5992 static bool
5993 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5994 {
5995   unsigned i, n_ivs;
5996   comp_cost acost, best_cost = iv_ca_cost (ivs);
5997   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5998   struct iv_cand *cand;
5999
6000   /* Try extending the set of induction variables by one.  */
6001   for (i = 0; i < n_iv_cands (data); i++)
6002     {
6003       cand = iv_cand (data, i);
6004
6005       if (iv_ca_cand_used_p (ivs, cand))
6006         continue;
6007
6008       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6009       if (!act_delta)
6010         continue;
6011
6012       /* If we successfully added the candidate and the set is small enough,
6013          try optimizing it by removing other candidates.  */
6014       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6015         {
6016           iv_ca_delta_commit (data, ivs, act_delta, true);
6017           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6018           iv_ca_delta_commit (data, ivs, act_delta, false);
6019           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6020         }
6021
6022       if (compare_costs (acost, best_cost) < 0)
6023         {
6024           best_cost = acost;
6025           iv_ca_delta_free (&best_delta);
6026           best_delta = act_delta;
6027         }
6028       else
6029         iv_ca_delta_free (&act_delta);
6030     }
6031
6032   if (!best_delta)
6033     {
6034       /* Try removing the candidates from the set instead.  */
6035       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6036
6037       /* Nothing more we can do.  */
6038       if (!best_delta)
6039         return false;
6040     }
6041
6042   iv_ca_delta_commit (data, ivs, best_delta, true);
6043   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6044   iv_ca_delta_free (&best_delta);
6045   return true;
6046 }
6047
6048 /* Attempts to find the optimal set of induction variables.  We do simple
6049    greedy heuristic -- we try to replace at most one candidate in the selected
6050    solution and remove the unused ivs while this improves the cost.  */
6051
6052 static struct iv_ca *
6053 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6054 {
6055   struct iv_ca *set;
6056
6057   /* Get the initial solution.  */
6058   set = get_initial_solution (data, originalp);
6059   if (!set)
6060     {
6061       if (dump_file && (dump_flags & TDF_DETAILS))
6062         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6063       return NULL;
6064     }
6065
6066   if (dump_file && (dump_flags & TDF_DETAILS))
6067     {
6068       fprintf (dump_file, "Initial set of candidates:\n");
6069       iv_ca_dump (data, dump_file, set);
6070     }
6071
6072   while (try_improve_iv_set (data, set))
6073     {
6074       if (dump_file && (dump_flags & TDF_DETAILS))
6075         {
6076           fprintf (dump_file, "Improved to:\n");
6077           iv_ca_dump (data, dump_file, set);
6078         }
6079     }
6080
6081   return set;
6082 }
6083
6084 static struct iv_ca *
6085 find_optimal_iv_set (struct ivopts_data *data)
6086 {
6087   unsigned i;
6088   struct iv_ca *set, *origset;
6089   struct iv_use *use;
6090   comp_cost cost, origcost;
6091
6092   /* Determine the cost based on a strategy that starts with original IVs,
6093      and try again using a strategy that prefers candidates not based
6094      on any IVs.  */
6095   origset = find_optimal_iv_set_1 (data, true);
6096   set = find_optimal_iv_set_1 (data, false);
6097
6098   if (!origset && !set)
6099     return NULL;
6100
6101   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6102   cost = set ? iv_ca_cost (set) : infinite_cost;
6103
6104   if (dump_file && (dump_flags & TDF_DETAILS))
6105     {
6106       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6107                origcost.cost, origcost.complexity);
6108       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6109                cost.cost, cost.complexity);
6110     }
6111
6112   /* Choose the one with the best cost.  */
6113   if (compare_costs (origcost, cost) <= 0)
6114     {
6115       if (set)
6116         iv_ca_free (&set);
6117       set = origset;
6118     }
6119   else if (origset)
6120     iv_ca_free (&origset);
6121
6122   for (i = 0; i < n_iv_uses (data); i++)
6123     {
6124       use = iv_use (data, i);
6125       use->selected = iv_ca_cand_for_use (set, use)->cand;
6126     }
6127
6128   return set;
6129 }
6130
6131 /* Creates a new induction variable corresponding to CAND.  */
6132
6133 static void
6134 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6135 {
6136   gimple_stmt_iterator incr_pos;
6137   tree base;
6138   bool after = false;
6139
6140   if (!cand->iv)
6141     return;
6142
6143   switch (cand->pos)
6144     {
6145     case IP_NORMAL:
6146       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6147       break;
6148
6149     case IP_END:
6150       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6151       after = true;
6152       break;
6153
6154     case IP_AFTER_USE:
6155       after = true;
6156       /* fall through */
6157     case IP_BEFORE_USE:
6158       incr_pos = gsi_for_stmt (cand->incremented_at);
6159       break;
6160
6161     case IP_ORIGINAL:
6162       /* Mark that the iv is preserved.  */
6163       name_info (data, cand->var_before)->preserve_biv = true;
6164       name_info (data, cand->var_after)->preserve_biv = true;
6165
6166       /* Rewrite the increment so that it uses var_before directly.  */
6167       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6168       return;
6169     }
6170
6171   gimple_add_tmp_var (cand->var_before);
6172
6173   base = unshare_expr (cand->iv->base);
6174
6175   create_iv (base, unshare_expr (cand->iv->step),
6176              cand->var_before, data->current_loop,
6177              &incr_pos, after, &cand->var_before, &cand->var_after);
6178 }
6179
6180 /* Creates new induction variables described in SET.  */
6181
6182 static void
6183 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6184 {
6185   unsigned i;
6186   struct iv_cand *cand;
6187   bitmap_iterator bi;
6188
6189   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6190     {
6191       cand = iv_cand (data, i);
6192       create_new_iv (data, cand);
6193     }
6194
6195   if (dump_file && (dump_flags & TDF_DETAILS))
6196     {
6197       fprintf (dump_file, "\nSelected IV set: \n");
6198       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6199         {
6200           cand = iv_cand (data, i);
6201           dump_cand (dump_file, cand);
6202         }
6203       fprintf (dump_file, "\n");
6204     }
6205 }
6206
6207 /* Rewrites USE (definition of iv used in a nonlinear expression)
6208    using candidate CAND.  */
6209
6210 static void
6211 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6212                             struct iv_use *use, struct iv_cand *cand)
6213 {
6214   tree comp;
6215   tree op, tgt;
6216   gimple ass;
6217   gimple_stmt_iterator bsi;
6218
6219   /* An important special case -- if we are asked to express value of
6220      the original iv by itself, just exit; there is no need to
6221      introduce a new computation (that might also need casting the
6222      variable to unsigned and back).  */
6223   if (cand->pos == IP_ORIGINAL
6224       && cand->incremented_at == use->stmt)
6225     {
6226       enum tree_code stmt_code;
6227
6228       gcc_assert (is_gimple_assign (use->stmt));
6229       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6230
6231       /* Check whether we may leave the computation unchanged.
6232          This is the case only if it does not rely on other
6233          computations in the loop -- otherwise, the computation
6234          we rely upon may be removed in remove_unused_ivs,
6235          thus leading to ICE.  */
6236       stmt_code = gimple_assign_rhs_code (use->stmt);
6237       if (stmt_code == PLUS_EXPR
6238           || stmt_code == MINUS_EXPR
6239           || stmt_code == POINTER_PLUS_EXPR)
6240         {
6241           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6242             op = gimple_assign_rhs2 (use->stmt);
6243           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6244             op = gimple_assign_rhs1 (use->stmt);
6245           else
6246             op = NULL_TREE;
6247         }
6248       else
6249         op = NULL_TREE;
6250
6251       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6252         return;
6253     }
6254
6255   comp = get_computation (data->current_loop, use, cand);
6256   gcc_assert (comp != NULL_TREE);
6257
6258   switch (gimple_code (use->stmt))
6259     {
6260     case GIMPLE_PHI:
6261       tgt = PHI_RESULT (use->stmt);
6262
6263       /* If we should keep the biv, do not replace it.  */
6264       if (name_info (data, tgt)->preserve_biv)
6265         return;
6266
6267       bsi = gsi_after_labels (gimple_bb (use->stmt));
6268       break;
6269
6270     case GIMPLE_ASSIGN:
6271       tgt = gimple_assign_lhs (use->stmt);
6272       bsi = gsi_for_stmt (use->stmt);
6273       break;
6274
6275     default:
6276       gcc_unreachable ();
6277     }
6278
6279   if (!valid_gimple_rhs_p (comp)
6280       || (gimple_code (use->stmt) != GIMPLE_PHI
6281           /* We can't allow re-allocating the stmt as it might be pointed
6282              to still.  */
6283           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6284               >= gimple_num_ops (gsi_stmt (bsi)))))
6285     {
6286       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6287                                        true, GSI_SAME_STMT);
6288       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6289         {
6290           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6291           /* As this isn't a plain copy we have to reset alignment
6292              information.  */
6293           if (SSA_NAME_PTR_INFO (comp))
6294             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6295         }
6296     }
6297
6298   if (gimple_code (use->stmt) == GIMPLE_PHI)
6299     {
6300       ass = gimple_build_assign (tgt, comp);
6301       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6302
6303       bsi = gsi_for_stmt (use->stmt);
6304       remove_phi_node (&bsi, false);
6305     }
6306   else
6307     {
6308       gimple_assign_set_rhs_from_tree (&bsi, comp);
6309       use->stmt = gsi_stmt (bsi);
6310     }
6311 }
6312
6313 /* Performs a peephole optimization to reorder the iv update statement with
6314    a mem ref to enable instruction combining in later phases. The mem ref uses
6315    the iv value before the update, so the reordering transformation requires
6316    adjustment of the offset. CAND is the selected IV_CAND.
6317
6318    Example:
6319
6320    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6321    iv2 = iv1 + 1;
6322
6323    if (t < val)      (1)
6324      goto L;
6325    goto Head;
6326
6327
6328    directly propagating t over to (1) will introduce overlapping live range
6329    thus increase register pressure. This peephole transform it into:
6330
6331
6332    iv2 = iv1 + 1;
6333    t = MEM_REF (base, iv2, 8, 8);
6334    if (t < val)
6335      goto L;
6336    goto Head;
6337 */
6338
6339 static void
6340 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6341 {
6342   tree var_after;
6343   gimple iv_update, stmt;
6344   basic_block bb;
6345   gimple_stmt_iterator gsi, gsi_iv;
6346
6347   if (cand->pos != IP_NORMAL)
6348     return;
6349
6350   var_after = cand->var_after;
6351   iv_update = SSA_NAME_DEF_STMT (var_after);
6352
6353   bb = gimple_bb (iv_update);
6354   gsi = gsi_last_nondebug_bb (bb);
6355   stmt = gsi_stmt (gsi);
6356
6357   /* Only handle conditional statement for now.  */
6358   if (gimple_code (stmt) != GIMPLE_COND)
6359     return;
6360
6361   gsi_prev_nondebug (&gsi);
6362   stmt = gsi_stmt (gsi);
6363   if (stmt != iv_update)
6364     return;
6365
6366   gsi_prev_nondebug (&gsi);
6367   if (gsi_end_p (gsi))
6368     return;
6369
6370   stmt = gsi_stmt (gsi);
6371   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6372     return;
6373
6374   if (stmt != use->stmt)
6375     return;
6376
6377   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6378     return;
6379
6380   if (dump_file && (dump_flags & TDF_DETAILS))
6381     {
6382       fprintf (dump_file, "Reordering \n");
6383       print_gimple_stmt (dump_file, iv_update, 0, 0);
6384       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6385       fprintf (dump_file, "\n");
6386     }
6387
6388   gsi = gsi_for_stmt (use->stmt);
6389   gsi_iv = gsi_for_stmt (iv_update);
6390   gsi_move_before (&gsi_iv, &gsi);
6391
6392   cand->pos = IP_BEFORE_USE;
6393   cand->incremented_at = use->stmt;
6394 }
6395
6396 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6397
6398 static void
6399 rewrite_use_address (struct ivopts_data *data,
6400                      struct iv_use *use, struct iv_cand *cand)
6401 {
6402   aff_tree aff;
6403   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6404   tree base_hint = NULL_TREE;
6405   tree ref, iv;
6406   bool ok;
6407
6408   adjust_iv_update_pos (cand, use);
6409   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6410   gcc_assert (ok);
6411   unshare_aff_combination (&aff);
6412
6413   /* To avoid undefined overflow problems, all IV candidates use unsigned
6414      integer types.  The drawback is that this makes it impossible for
6415      create_mem_ref to distinguish an IV that is based on a memory object
6416      from one that represents simply an offset.
6417
6418      To work around this problem, we pass a hint to create_mem_ref that
6419      indicates which variable (if any) in aff is an IV based on a memory
6420      object.  Note that we only consider the candidate.  If this is not
6421      based on an object, the base of the reference is in some subexpression
6422      of the use -- but these will use pointer types, so they are recognized
6423      by the create_mem_ref heuristics anyway.  */
6424   if (cand->iv->base_object)
6425     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6426
6427   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6428   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6429                         reference_alias_ptr_type (*use->op_p),
6430                         iv, base_hint, data->speed);
6431   copy_ref_info (ref, *use->op_p);
6432   *use->op_p = ref;
6433 }
6434
6435 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6436    candidate CAND.  */
6437
6438 static void
6439 rewrite_use_compare (struct ivopts_data *data,
6440                      struct iv_use *use, struct iv_cand *cand)
6441 {
6442   tree comp, *var_p, op, bound;
6443   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6444   enum tree_code compare;
6445   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6446   bool ok;
6447
6448   bound = cp->value;
6449   if (bound)
6450     {
6451       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6452       tree var_type = TREE_TYPE (var);
6453       gimple_seq stmts;
6454
6455       if (dump_file && (dump_flags & TDF_DETAILS))
6456         {
6457           fprintf (dump_file, "Replacing exit test: ");
6458           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6459         }
6460       compare = cp->comp;
6461       bound = unshare_expr (fold_convert (var_type, bound));
6462       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6463       if (stmts)
6464         gsi_insert_seq_on_edge_immediate (
6465                 loop_preheader_edge (data->current_loop),
6466                 stmts);
6467
6468       gimple_cond_set_lhs (use->stmt, var);
6469       gimple_cond_set_code (use->stmt, compare);
6470       gimple_cond_set_rhs (use->stmt, op);
6471       return;
6472     }
6473
6474   /* The induction variable elimination failed; just express the original
6475      giv.  */
6476   comp = get_computation (data->current_loop, use, cand);
6477   gcc_assert (comp != NULL_TREE);
6478
6479   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6480   gcc_assert (ok);
6481
6482   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6483                                      true, GSI_SAME_STMT);
6484 }
6485
6486 /* Rewrites USE using candidate CAND.  */
6487
6488 static void
6489 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6490 {
6491   switch (use->type)
6492     {
6493       case USE_NONLINEAR_EXPR:
6494         rewrite_use_nonlinear_expr (data, use, cand);
6495         break;
6496
6497       case USE_ADDRESS:
6498         rewrite_use_address (data, use, cand);
6499         break;
6500
6501       case USE_COMPARE:
6502         rewrite_use_compare (data, use, cand);
6503         break;
6504
6505       default:
6506         gcc_unreachable ();
6507     }
6508
6509   update_stmt (use->stmt);
6510 }
6511
6512 /* Rewrite the uses using the selected induction variables.  */
6513
6514 static void
6515 rewrite_uses (struct ivopts_data *data)
6516 {
6517   unsigned i;
6518   struct iv_cand *cand;
6519   struct iv_use *use;
6520
6521   for (i = 0; i < n_iv_uses (data); i++)
6522     {
6523       use = iv_use (data, i);
6524       cand = use->selected;
6525       gcc_assert (cand);
6526
6527       rewrite_use (data, use, cand);
6528     }
6529 }
6530
6531 /* Removes the ivs that are not used after rewriting.  */
6532
6533 static void
6534 remove_unused_ivs (struct ivopts_data *data)
6535 {
6536   unsigned j;
6537   bitmap_iterator bi;
6538   bitmap toremove = BITMAP_ALLOC (NULL);
6539
6540   /* Figure out an order in which to release SSA DEFs so that we don't
6541      release something that we'd have to propagate into a debug stmt
6542      afterwards.  */
6543   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6544     {
6545       struct version_info *info;
6546
6547       info = ver_info (data, j);
6548       if (info->iv
6549           && !integer_zerop (info->iv->step)
6550           && !info->inv_id
6551           && !info->iv->have_use_for
6552           && !info->preserve_biv)
6553         {
6554           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6555
6556           tree def = info->iv->ssa_name;
6557
6558           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6559             {
6560               imm_use_iterator imm_iter;
6561               use_operand_p use_p;
6562               gimple stmt;
6563               int count = 0;
6564
6565               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6566                 {
6567                   if (!gimple_debug_bind_p (stmt))
6568                     continue;
6569
6570                   /* We just want to determine whether to do nothing
6571                      (count == 0), to substitute the computed
6572                      expression into a single use of the SSA DEF by
6573                      itself (count == 1), or to use a debug temp
6574                      because the SSA DEF is used multiple times or as
6575                      part of a larger expression (count > 1). */
6576                   count++;
6577                   if (gimple_debug_bind_get_value (stmt) != def)
6578                     count++;
6579
6580                   if (count > 1)
6581                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6582                 }
6583
6584               if (!count)
6585                 continue;
6586
6587               struct iv_use dummy_use;
6588               struct iv_cand *best_cand = NULL, *cand;
6589               unsigned i, best_pref = 0, cand_pref;
6590
6591               memset (&dummy_use, 0, sizeof (dummy_use));
6592               dummy_use.iv = info->iv;
6593               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6594                 {
6595                   cand = iv_use (data, i)->selected;
6596                   if (cand == best_cand)
6597                     continue;
6598                   cand_pref = operand_equal_p (cand->iv->step,
6599                                                info->iv->step, 0)
6600                     ? 4 : 0;
6601                   cand_pref
6602                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6603                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6604                     ? 2 : 0;
6605                   cand_pref
6606                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6607                     ? 1 : 0;
6608                   if (best_cand == NULL || best_pref < cand_pref)
6609                     {
6610                       best_cand = cand;
6611                       best_pref = cand_pref;
6612                     }
6613                 }
6614
6615               if (!best_cand)
6616                 continue;
6617
6618               tree comp = get_computation_at (data->current_loop,
6619                                               &dummy_use, best_cand,
6620                                               SSA_NAME_DEF_STMT (def));
6621               if (!comp)
6622                 continue;
6623
6624               if (count > 1)
6625                 {
6626                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6627                   DECL_ARTIFICIAL (vexpr) = 1;
6628                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6629                   if (SSA_NAME_VAR (def))
6630                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6631                   else
6632                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6633                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6634                   gimple_stmt_iterator gsi;
6635
6636                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6637                     gsi = gsi_after_labels (gimple_bb
6638                                             (SSA_NAME_DEF_STMT (def)));
6639                   else
6640                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6641
6642                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6643                   comp = vexpr;
6644                 }
6645
6646               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6647                 {
6648                   if (!gimple_debug_bind_p (stmt))
6649                     continue;
6650
6651                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6652                     SET_USE (use_p, comp);
6653
6654                   update_stmt (stmt);
6655                 }
6656             }
6657         }
6658     }
6659
6660   release_defs_bitset (toremove);
6661
6662   BITMAP_FREE (toremove);
6663 }
6664
6665 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6666    for pointer_map_traverse.  */
6667
6668 static bool
6669 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6670                       void *data ATTRIBUTE_UNUSED)
6671 {
6672   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6673
6674   free (niter);
6675   return true;
6676 }
6677
6678 /* Frees data allocated by the optimization of a single loop.  */
6679
6680 static void
6681 free_loop_data (struct ivopts_data *data)
6682 {
6683   unsigned i, j;
6684   bitmap_iterator bi;
6685   tree obj;
6686
6687   if (data->niters)
6688     {
6689       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6690       pointer_map_destroy (data->niters);
6691       data->niters = NULL;
6692     }
6693
6694   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6695     {
6696       struct version_info *info;
6697
6698       info = ver_info (data, i);
6699       free (info->iv);
6700       info->iv = NULL;
6701       info->has_nonlin_use = false;
6702       info->preserve_biv = false;
6703       info->inv_id = 0;
6704     }
6705   bitmap_clear (data->relevant);
6706   bitmap_clear (data->important_candidates);
6707
6708   for (i = 0; i < n_iv_uses (data); i++)
6709     {
6710       struct iv_use *use = iv_use (data, i);
6711
6712       free (use->iv);
6713       BITMAP_FREE (use->related_cands);
6714       for (j = 0; j < use->n_map_members; j++)
6715         if (use->cost_map[j].depends_on)
6716           BITMAP_FREE (use->cost_map[j].depends_on);
6717       free (use->cost_map);
6718       free (use);
6719     }
6720   data->iv_uses.truncate (0);
6721
6722   for (i = 0; i < n_iv_cands (data); i++)
6723     {
6724       struct iv_cand *cand = iv_cand (data, i);
6725
6726       free (cand->iv);
6727       if (cand->depends_on)
6728         BITMAP_FREE (cand->depends_on);
6729       free (cand);
6730     }
6731   data->iv_candidates.truncate (0);
6732
6733   if (data->version_info_size < num_ssa_names)
6734     {
6735       data->version_info_size = 2 * num_ssa_names;
6736       free (data->version_info);
6737       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6738     }
6739
6740   data->max_inv_id = 0;
6741
6742   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6743     SET_DECL_RTL (obj, NULL_RTX);
6744
6745   decl_rtl_to_reset.truncate (0);
6746
6747   data->inv_expr_tab.empty ();
6748   data->inv_expr_id = 0;
6749 }
6750
6751 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6752    loop tree.  */
6753
6754 static void
6755 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6756 {
6757   free_loop_data (data);
6758   free (data->version_info);
6759   BITMAP_FREE (data->relevant);
6760   BITMAP_FREE (data->important_candidates);
6761
6762   decl_rtl_to_reset.release ();
6763   data->iv_uses.release ();
6764   data->iv_candidates.release ();
6765   data->inv_expr_tab.dispose ();
6766 }
6767
6768 /* Returns true if the loop body BODY includes any function calls.  */
6769
6770 static bool
6771 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6772 {
6773   gimple_stmt_iterator gsi;
6774   unsigned i;
6775
6776   for (i = 0; i < num_nodes; i++)
6777     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6778       {
6779         gimple stmt = gsi_stmt (gsi);
6780         if (is_gimple_call (stmt)
6781             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6782           return true;
6783       }
6784   return false;
6785 }
6786
6787 /* Optimizes the LOOP.  Returns true if anything changed.  */
6788
6789 static bool
6790 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6791 {
6792   bool changed = false;
6793   struct iv_ca *iv_ca;
6794   edge exit = single_dom_exit (loop);
6795   basic_block *body;
6796
6797   gcc_assert (!data->niters);
6798   data->current_loop = loop;
6799   data->speed = optimize_loop_for_speed_p (loop);
6800
6801   if (dump_file && (dump_flags & TDF_DETAILS))
6802     {
6803       fprintf (dump_file, "Processing loop %d\n", loop->num);
6804
6805       if (exit)
6806         {
6807           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6808                    exit->src->index, exit->dest->index);
6809           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6810           fprintf (dump_file, "\n");
6811         }
6812
6813       fprintf (dump_file, "\n");
6814     }
6815
6816   body = get_loop_body (loop);
6817   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6818   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6819   free (body);
6820
6821   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6822
6823   /* For each ssa name determines whether it behaves as an induction variable
6824      in some loop.  */
6825   if (!find_induction_variables (data))
6826     goto finish;
6827
6828   /* Finds interesting uses (item 1).  */
6829   find_interesting_uses (data);
6830   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6831     goto finish;
6832
6833   /* Finds candidates for the induction variables (item 2).  */
6834   find_iv_candidates (data);
6835
6836   /* Calculates the costs (item 3, part 1).  */
6837   determine_iv_costs (data);
6838   determine_use_iv_costs (data);
6839   determine_set_costs (data);
6840
6841   /* Find the optimal set of induction variables (item 3, part 2).  */
6842   iv_ca = find_optimal_iv_set (data);
6843   if (!iv_ca)
6844     goto finish;
6845   changed = true;
6846
6847   /* Create the new induction variables (item 4, part 1).  */
6848   create_new_ivs (data, iv_ca);
6849   iv_ca_free (&iv_ca);
6850
6851   /* Rewrite the uses (item 4, part 2).  */
6852   rewrite_uses (data);
6853
6854   /* Remove the ivs that are unused after rewriting.  */
6855   remove_unused_ivs (data);
6856
6857   /* We have changed the structure of induction variables; it might happen
6858      that definitions in the scev database refer to some of them that were
6859      eliminated.  */
6860   scev_reset ();
6861
6862 finish:
6863   free_loop_data (data);
6864
6865   return changed;
6866 }
6867
6868 /* Main entry point.  Optimizes induction variables in loops.  */
6869
6870 void
6871 tree_ssa_iv_optimize (void)
6872 {
6873   struct loop *loop;
6874   struct ivopts_data data;
6875   loop_iterator li;
6876
6877   tree_ssa_iv_optimize_init (&data);
6878
6879   /* Optimize the loops starting with the innermost ones.  */
6880   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6881     {
6882       if (dump_file && (dump_flags & TDF_DETAILS))
6883         flow_loop_dump (loop, dump_file, NULL, 1);
6884
6885       tree_ssa_iv_optimize_loop (&data, loop);
6886     }
6887
6888   tree_ssa_iv_optimize_finalize (&data);
6889 }