gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "stor-layout.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "gimple-pretty-print.h"
  73 #include "hash-map.h"
  74 #include "hash-table.h"
  75 #include "tree-ssa-alias.h"
  76 #include "internal-fn.h"
  77 #include "tree-eh.h"
  78 #include "gimple-expr.h"
  79 #include "is-a.h"
  80 #include "gimple.h"
  81 #include "gimplify.h"
  82 #include "gimple-iterator.h"
  83 #include "gimplify-me.h"
  84 #include "gimple-ssa.h"
  85 #include "cgraph.h"
  86 #include "tree-cfg.h"
  87 #include "tree-phinodes.h"
  88 #include "ssa-iterators.h"
  89 #include "stringpool.h"
  90 #include "tree-ssanames.h"
  91 #include "tree-ssa-loop-ivopts.h"
  92 #include "tree-ssa-loop-manip.h"
  93 #include "tree-ssa-loop-niter.h"
  94 #include "tree-ssa-loop.h"
  95 #include "expr.h"
  96 #include "tree-dfa.h"
  97 #include "tree-ssa.h"
  98 #include "cfgloop.h"
  99 #include "tree-pass.h"
 100 #include "insn-config.h"
 101 #include "tree-chrec.h"
 102 #include "tree-scalar-evolution.h"
 103 #include "cfgloop.h"
 104 #include "params.h"
 105 #include "langhooks.h"
 106 #include "tree-affine.h"
 107 #include "target.h"
 108 #include "tree-inline.h"
 109 #include "tree-ssa-propagate.h"
 110 #include "expmed.h"
 111 #include "tree-ssa-address.h"
 112 #include "builtins.h"
 113
 114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 115    cost of different addressing modes.  This should be moved to a TBD
 116    interface between the GIMPLE and RTL worlds.  */
 117 #include "expr.h"
 118 #include "recog.h"
 119
 120 /* The infinite cost.  */
 121 #define INFTY 10000000
 122
 123 #define AVG_LOOP_NITER(LOOP) 5
 124
 125 /* Returns the expected number of loop iterations for LOOP.
 126    The average trip count is computed from profile data if it
 127    exists. */
 128
 129 static inline HOST_WIDE_INT
 130 avg_loop_niter (struct loop *loop)
 131 {
 132   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 133   if (niter == -1)
 134     return AVG_LOOP_NITER (loop);
 135
 136   return niter;
 137 }
 138
 139 /* Representation of the induction variable.  */
 140 struct iv
 141 {
 142   tree base;            /* Initial value of the iv.  */
 143   tree base_object;     /* A memory object to that the induction variable points.  */
 144   tree step;            /* Step of the iv (constant only).  */
 145   tree ssa_name;        /* The ssa name with the value.  */
 146   bool biv_p;           /* Is it a biv?  */
 147   bool have_use_for;    /* Do we already have a use for it?  */
 148   unsigned use_id;      /* The identifier in the use if it is the case.  */
 149 };
 150
 151 /* Per-ssa version information (induction variable descriptions, etc.).  */
 152 struct version_info
 153 {
 154   tree name;            /* The ssa name.  */
 155   struct iv *iv;        /* Induction variable description.  */
 156   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 157                            an expression that is not an induction variable.  */
 158   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 159   unsigned inv_id;      /* Id of an invariant.  */
 160 };
 161
 162 /* Types of uses.  */
 163 enum use_type
 164 {
 165   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 166   USE_ADDRESS,          /* Use in an address.  */
 167   USE_COMPARE           /* Use is a compare.  */
 168 };
 169
 170 /* Cost of a computation.  */
 171 typedef struct
 172 {
 173   int cost;             /* The runtime cost.  */
 174   unsigned complexity;  /* The estimate of the complexity of the code for
 175                            the computation (in no concrete units --
 176                            complexity field should be larger for more
 177                            complex expressions and addressing modes).  */
 178 } comp_cost;
 179
 180 static const comp_cost no_cost = {0, 0};
 181 static const comp_cost infinite_cost = {INFTY, INFTY};
 182
 183 /* The candidate - cost pair.  */
 184 struct cost_pair
 185 {
 186   struct iv_cand *cand; /* The candidate.  */
 187   comp_cost cost;       /* The cost.  */
 188   bitmap depends_on;    /* The list of invariants that have to be
 189                            preserved.  */
 190   tree value;           /* For final value elimination, the expression for
 191                            the final value of the iv.  For iv elimination,
 192                            the new bound to compare with.  */
 193   enum tree_code comp;  /* For iv elimination, the comparison.  */
 194   int inv_expr_id;      /* Loop invariant expression id.  */
 195 };
 196
 197 /* Use.  */
 198 struct iv_use
 199 {
 200   unsigned id;          /* The id of the use.  */
 201   enum use_type type;   /* Type of the use.  */
 202   struct iv *iv;        /* The induction variable it is based on.  */
 203   gimple stmt;          /* Statement in that it occurs.  */
 204   tree *op_p;           /* The place where it occurs.  */
 205   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 206                            important ones.  */
 207
 208   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 209   struct cost_pair *cost_map;
 210                         /* The costs wrto the iv candidates.  */
 211
 212   struct iv_cand *selected;
 213                         /* The selected candidate.  */
 214 };
 215
 216 /* The position where the iv is computed.  */
 217 enum iv_position
 218 {
 219   IP_NORMAL,            /* At the end, just before the exit condition.  */
 220   IP_END,               /* At the end of the latch block.  */
 221   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 222   IP_AFTER_USE,         /* Immediately after a specific use.  */
 223   IP_ORIGINAL           /* The original biv.  */
 224 };
 225
 226 /* The induction variable candidate.  */
 227 struct iv_cand
 228 {
 229   unsigned id;          /* The number of the candidate.  */
 230   bool important;       /* Whether this is an "important" candidate, i.e. such
 231                            that it should be considered by all uses.  */
 232   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 233   gimple incremented_at;/* For original biv, the statement where it is
 234                            incremented.  */
 235   tree var_before;      /* The variable used for it before increment.  */
 236   tree var_after;       /* The variable used for it after increment.  */
 237   struct iv *iv;        /* The value of the candidate.  NULL for
 238                            "pseudocandidate" used to indicate the possibility
 239                            to replace the final value of an iv by direct
 240                            computation of the value.  */
 241   unsigned cost;        /* Cost of the candidate.  */
 242   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 243   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 244                               where it is incremented.  */
 245   bitmap depends_on;    /* The list of invariants that are used in step of the
 246                            biv.  */
 247 };
 248
 249 /* Loop invariant expression hashtable entry.  */
 250 struct iv_inv_expr_ent
 251 {
 252   tree expr;
 253   int id;
 254   hashval_t hash;
 255 };
 256
 257 /* The data used by the induction variable optimizations.  */
 258
 259 typedef struct iv_use *iv_use_p;
 260
 261 typedef struct iv_cand *iv_cand_p;
 262
 263 /* Hashtable helpers.  */
 264
 265 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 266 {
 267   typedef iv_inv_expr_ent value_type;
 268   typedef iv_inv_expr_ent compare_type;
 269   static inline hashval_t hash (const value_type *);
 270   static inline bool equal (const value_type *, const compare_type *);
 271 };
 272
 273 /* Hash function for loop invariant expressions.  */
 274
 275 inline hashval_t
 276 iv_inv_expr_hasher::hash (const value_type *expr)
 277 {
 278   return expr->hash;
 279 }
 280
 281 /* Hash table equality function for expressions.  */
 282
 283 inline bool
 284 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 285 {
 286   return expr1->hash == expr2->hash
 287          && operand_equal_p (expr1->expr, expr2->expr, 0);
 288 }
 289
 290 struct ivopts_data
 291 {
 292   /* The currently optimized loop.  */
 293   struct loop *current_loop;
 294
 295   /* Numbers of iterations for all exits of the current loop.  */
 296   hash_map<edge, tree_niter_desc *> *niters;
 297
 298   /* Number of registers used in it.  */
 299   unsigned regs_used;
 300
 301   /* The size of version_info array allocated.  */
 302   unsigned version_info_size;
 303
 304   /* The array of information for the ssa names.  */
 305   struct version_info *version_info;
 306
 307   /* The hashtable of loop invariant expressions created
 308      by ivopt.  */
 309   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 310
 311   /* Loop invariant expression id.  */
 312   int inv_expr_id;
 313
 314   /* The bitmap of indices in version_info whose value was changed.  */
 315   bitmap relevant;
 316
 317   /* The uses of induction variables.  */
 318   vec<iv_use_p> iv_uses;
 319
 320   /* The candidates.  */
 321   vec<iv_cand_p> iv_candidates;
 322
 323   /* A bitmap of important candidates.  */
 324   bitmap important_candidates;
 325
 326   /* Cache used by tree_to_aff_combination_expand.  */
 327   hash_map<tree, name_expansion *> *name_expansion_cache;
 328
 329   /* The maximum invariant id.  */
 330   unsigned max_inv_id;
 331
 332   /* Whether to consider just related and important candidates when replacing a
 333      use.  */
 334   bool consider_all_candidates;
 335
 336   /* Are we optimizing for speed?  */
 337   bool speed;
 338
 339   /* Whether the loop body includes any function calls.  */
 340   bool body_includes_call;
 341
 342   /* Whether the loop body can only be exited via single exit.  */
 343   bool loop_single_exit_p;
 344 };
 345
 346 /* An assignment of iv candidates to uses.  */
 347
 348 struct iv_ca
 349 {
 350   /* The number of uses covered by the assignment.  */
 351   unsigned upto;
 352
 353   /* Number of uses that cannot be expressed by the candidates in the set.  */
 354   unsigned bad_uses;
 355
 356   /* Candidate assigned to a use, together with the related costs.  */
 357   struct cost_pair **cand_for_use;
 358
 359   /* Number of times each candidate is used.  */
 360   unsigned *n_cand_uses;
 361
 362   /* The candidates used.  */
 363   bitmap cands;
 364
 365   /* The number of candidates in the set.  */
 366   unsigned n_cands;
 367
 368   /* Total number of registers needed.  */
 369   unsigned n_regs;
 370
 371   /* Total cost of expressing uses.  */
 372   comp_cost cand_use_cost;
 373
 374   /* Total cost of candidates.  */
 375   unsigned cand_cost;
 376
 377   /* Number of times each invariant is used.  */
 378   unsigned *n_invariant_uses;
 379
 380   /* The array holding the number of uses of each loop
 381      invariant expressions created by ivopt.  */
 382   unsigned *used_inv_expr;
 383
 384   /* The number of created loop invariants.  */
 385   unsigned num_used_inv_expr;
 386
 387   /* Total cost of the assignment.  */
 388   comp_cost cost;
 389 };
 390
 391 /* Difference of two iv candidate assignments.  */
 392
 393 struct iv_ca_delta
 394 {
 395   /* Changed use.  */
 396   struct iv_use *use;
 397
 398   /* An old assignment (for rollback purposes).  */
 399   struct cost_pair *old_cp;
 400
 401   /* A new assignment.  */
 402   struct cost_pair *new_cp;
 403
 404   /* Next change in the list.  */
 405   struct iv_ca_delta *next_change;
 406 };
 407
 408 /* Bound on number of candidates below that all candidates are considered.  */
 409
 410 #define CONSIDER_ALL_CANDIDATES_BOUND \
 411   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 412
 413 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 414    optimizing such a loop would help, and it would take ages).  */
 415
 416 #define MAX_CONSIDERED_USES \
 417   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 418
 419 /* If there are at most this number of ivs in the set, try removing unnecessary
 420    ivs from the set always.  */
 421
 422 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 423   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 424
 425 /* The list of trees for that the decl_rtl field must be reset is stored
 426    here.  */
 427
 428 static vec<tree> decl_rtl_to_reset;
 429
 430 static comp_cost force_expr_to_var_cost (tree, bool);
 431
 432 /* Number of uses recorded in DATA.  */
 433
 434 static inline unsigned
 435 n_iv_uses (struct ivopts_data *data)
 436 {
 437   return data->iv_uses.length ();
 438 }
 439
 440 /* Ith use recorded in DATA.  */
 441
 442 static inline struct iv_use *
 443 iv_use (struct ivopts_data *data, unsigned i)
 444 {
 445   return data->iv_uses[i];
 446 }
 447
 448 /* Number of candidates recorded in DATA.  */
 449
 450 static inline unsigned
 451 n_iv_cands (struct ivopts_data *data)
 452 {
 453   return data->iv_candidates.length ();
 454 }
 455
 456 /* Ith candidate recorded in DATA.  */
 457
 458 static inline struct iv_cand *
 459 iv_cand (struct ivopts_data *data, unsigned i)
 460 {
 461   return data->iv_candidates[i];
 462 }
 463
 464 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 465
 466 edge
 467 single_dom_exit (struct loop *loop)
 468 {
 469   edge exit = single_exit (loop);
 470
 471   if (!exit)
 472     return NULL;
 473
 474   if (!just_once_each_iteration_p (loop, exit->src))
 475     return NULL;
 476
 477   return exit;
 478 }
 479
 480 /* Dumps information about the induction variable IV to FILE.  */
 481
 482 void
 483 dump_iv (FILE *file, struct iv *iv)
 484 {
 485   if (iv->ssa_name)
 486     {
 487       fprintf (file, "ssa name ");
 488       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 489       fprintf (file, "\n");
 490     }
 491
 492   fprintf (file, "  type ");
 493   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 494   fprintf (file, "\n");
 495
 496   if (iv->step)
 497     {
 498       fprintf (file, "  base ");
 499       print_generic_expr (file, iv->base, TDF_SLIM);
 500       fprintf (file, "\n");
 501
 502       fprintf (file, "  step ");
 503       print_generic_expr (file, iv->step, TDF_SLIM);
 504       fprintf (file, "\n");
 505     }
 506   else
 507     {
 508       fprintf (file, "  invariant ");
 509       print_generic_expr (file, iv->base, TDF_SLIM);
 510       fprintf (file, "\n");
 511     }
 512
 513   if (iv->base_object)
 514     {
 515       fprintf (file, "  base object ");
 516       print_generic_expr (file, iv->base_object, TDF_SLIM);
 517       fprintf (file, "\n");
 518     }
 519
 520   if (iv->biv_p)
 521     fprintf (file, "  is a biv\n");
 522 }
 523
 524 /* Dumps information about the USE to FILE.  */
 525
 526 void
 527 dump_use (FILE *file, struct iv_use *use)
 528 {
 529   fprintf (file, "use %d\n", use->id);
 530
 531   switch (use->type)
 532     {
 533     case USE_NONLINEAR_EXPR:
 534       fprintf (file, "  generic\n");
 535       break;
 536
 537     case USE_ADDRESS:
 538       fprintf (file, "  address\n");
 539       break;
 540
 541     case USE_COMPARE:
 542       fprintf (file, "  compare\n");
 543       break;
 544
 545     default:
 546       gcc_unreachable ();
 547     }
 548
 549   fprintf (file, "  in statement ");
 550   print_gimple_stmt (file, use->stmt, 0, 0);
 551   fprintf (file, "\n");
 552
 553   fprintf (file, "  at position ");
 554   if (use->op_p)
 555     print_generic_expr (file, *use->op_p, TDF_SLIM);
 556   fprintf (file, "\n");
 557
 558   dump_iv (file, use->iv);
 559
 560   if (use->related_cands)
 561     {
 562       fprintf (file, "  related candidates ");
 563       dump_bitmap (file, use->related_cands);
 564     }
 565 }
 566
 567 /* Dumps information about the uses to FILE.  */
 568
 569 void
 570 dump_uses (FILE *file, struct ivopts_data *data)
 571 {
 572   unsigned i;
 573   struct iv_use *use;
 574
 575   for (i = 0; i < n_iv_uses (data); i++)
 576     {
 577       use = iv_use (data, i);
 578
 579       dump_use (file, use);
 580       fprintf (file, "\n");
 581     }
 582 }
 583
 584 /* Dumps information about induction variable candidate CAND to FILE.  */
 585
 586 void
 587 dump_cand (FILE *file, struct iv_cand *cand)
 588 {
 589   struct iv *iv = cand->iv;
 590
 591   fprintf (file, "candidate %d%s\n",
 592            cand->id, cand->important ? " (important)" : "");
 593
 594   if (cand->depends_on)
 595     {
 596       fprintf (file, "  depends on ");
 597       dump_bitmap (file, cand->depends_on);
 598     }
 599
 600   if (!iv)
 601     {
 602       fprintf (file, "  final value replacement\n");
 603       return;
 604     }
 605
 606   if (cand->var_before)
 607     {
 608       fprintf (file, "  var_before ");
 609       print_generic_expr (file, cand->var_before, TDF_SLIM);
 610       fprintf (file, "\n");
 611     }
 612   if (cand->var_after)
 613     {
 614       fprintf (file, "  var_after ");
 615       print_generic_expr (file, cand->var_after, TDF_SLIM);
 616       fprintf (file, "\n");
 617     }
 618
 619   switch (cand->pos)
 620     {
 621     case IP_NORMAL:
 622       fprintf (file, "  incremented before exit test\n");
 623       break;
 624
 625     case IP_BEFORE_USE:
 626       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 627       break;
 628
 629     case IP_AFTER_USE:
 630       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 631       break;
 632
 633     case IP_END:
 634       fprintf (file, "  incremented at end\n");
 635       break;
 636
 637     case IP_ORIGINAL:
 638       fprintf (file, "  original biv\n");
 639       break;
 640     }
 641
 642   dump_iv (file, iv);
 643 }
 644
 645 /* Returns the info for ssa version VER.  */
 646
 647 static inline struct version_info *
 648 ver_info (struct ivopts_data *data, unsigned ver)
 649 {
 650   return data->version_info + ver;
 651 }
 652
 653 /* Returns the info for ssa name NAME.  */
 654
 655 static inline struct version_info *
 656 name_info (struct ivopts_data *data, tree name)
 657 {
 658   return ver_info (data, SSA_NAME_VERSION (name));
 659 }
 660
 661 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 662    emitted in LOOP.  */
 663
 664 static bool
 665 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 666 {
 667   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 668
 669   gcc_assert (bb);
 670
 671   if (sbb == loop->latch)
 672     return true;
 673
 674   if (sbb != bb)
 675     return false;
 676
 677   return stmt == last_stmt (bb);
 678 }
 679
 680 /* Returns true if STMT if after the place where the original induction
 681    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 682    if the positions are identical.  */
 683
 684 static bool
 685 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 686 {
 687   basic_block cand_bb = gimple_bb (cand->incremented_at);
 688   basic_block stmt_bb = gimple_bb (stmt);
 689
 690   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 691     return false;
 692
 693   if (stmt_bb != cand_bb)
 694     return true;
 695
 696   if (true_if_equal
 697       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 698     return true;
 699   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 700 }
 701
 702 /* Returns true if STMT if after the place where the induction variable
 703    CAND is incremented in LOOP.  */
 704
 705 static bool
 706 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 707 {
 708   switch (cand->pos)
 709     {
 710     case IP_END:
 711       return false;
 712
 713     case IP_NORMAL:
 714       return stmt_after_ip_normal_pos (loop, stmt);
 715
 716     case IP_ORIGINAL:
 717     case IP_AFTER_USE:
 718       return stmt_after_inc_pos (cand, stmt, false);
 719
 720     case IP_BEFORE_USE:
 721       return stmt_after_inc_pos (cand, stmt, true);
 722
 723     default:
 724       gcc_unreachable ();
 725     }
 726 }
 727
 728 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 729
 730 static bool
 731 abnormal_ssa_name_p (tree exp)
 732 {
 733   if (!exp)
 734     return false;
 735
 736   if (TREE_CODE (exp) != SSA_NAME)
 737     return false;
 738
 739   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 740 }
 741
 742 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 743    abnormal phi node.  Callback for for_each_index.  */
 744
 745 static bool
 746 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 747                                   void *data ATTRIBUTE_UNUSED)
 748 {
 749   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 750     {
 751       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 752         return false;
 753       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 754         return false;
 755     }
 756
 757   return !abnormal_ssa_name_p (*index);
 758 }
 759
 760 /* Returns true if EXPR contains a ssa name that occurs in an
 761    abnormal phi node.  */
 762
 763 bool
 764 contains_abnormal_ssa_name_p (tree expr)
 765 {
 766   enum tree_code code;
 767   enum tree_code_class codeclass;
 768
 769   if (!expr)
 770     return false;
 771
 772   code = TREE_CODE (expr);
 773   codeclass = TREE_CODE_CLASS (code);
 774
 775   if (code == SSA_NAME)
 776     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 777
 778   if (code == INTEGER_CST
 779       || is_gimple_min_invariant (expr))
 780     return false;
 781
 782   if (code == ADDR_EXPR)
 783     return !for_each_index (&TREE_OPERAND (expr, 0),
 784                             idx_contains_abnormal_ssa_name_p,
 785                             NULL);
 786
 787   if (code == COND_EXPR)
 788     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 789       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 790       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 791
 792   switch (codeclass)
 793     {
 794     case tcc_binary:
 795     case tcc_comparison:
 796       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 797         return true;
 798
 799       /* Fallthru.  */
 800     case tcc_unary:
 801       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 802         return true;
 803
 804       break;
 805
 806     default:
 807       gcc_unreachable ();
 808     }
 809
 810   return false;
 811 }
 812
 813 /*  Returns the structure describing number of iterations determined from
 814     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 815
 816 static struct tree_niter_desc *
 817 niter_for_exit (struct ivopts_data *data, edge exit)
 818 {
 819   struct tree_niter_desc *desc;
 820   tree_niter_desc **slot;
 821
 822   if (!data->niters)
 823     {
 824       data->niters = new hash_map<edge, tree_niter_desc *>;
 825       slot = NULL;
 826     }
 827   else
 828     slot = data->niters->get (exit);
 829
 830   if (!slot)
 831     {
 832       /* Try to determine number of iterations.  We cannot safely work with ssa
 833          names that appear in phi nodes on abnormal edges, so that we do not
 834          create overlapping life ranges for them (PR 27283).  */
 835       desc = XNEW (struct tree_niter_desc);
 836       if (!number_of_iterations_exit (data->current_loop,
 837                                       exit, desc, true)
 838           || contains_abnormal_ssa_name_p (desc->niter))
 839         {
 840           XDELETE (desc);
 841           desc = NULL;
 842         }
 843       data->niters->put (exit, desc);
 844     }
 845   else
 846     desc = *slot;
 847
 848   return desc;
 849 }
 850
 851 /* Returns the structure describing number of iterations determined from
 852    single dominating exit of DATA->current_loop, or NULL if something
 853    goes wrong.  */
 854
 855 static struct tree_niter_desc *
 856 niter_for_single_dom_exit (struct ivopts_data *data)
 857 {
 858   edge exit = single_dom_exit (data->current_loop);
 859
 860   if (!exit)
 861     return NULL;
 862
 863   return niter_for_exit (data, exit);
 864 }
 865
 866 /* Initializes data structures used by the iv optimization pass, stored
 867    in DATA.  */
 868
 869 static void
 870 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 871 {
 872   data->version_info_size = 2 * num_ssa_names;
 873   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 874   data->relevant = BITMAP_ALLOC (NULL);
 875   data->important_candidates = BITMAP_ALLOC (NULL);
 876   data->max_inv_id = 0;
 877   data->niters = NULL;
 878   data->iv_uses.create (20);
 879   data->iv_candidates.create (20);
 880   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 881   data->inv_expr_id = 0;
 882   data->name_expansion_cache = NULL;
 883   decl_rtl_to_reset.create (20);
 884 }
 885
 886 /* Returns a memory object to that EXPR points.  In case we are able to
 887    determine that it does not point to any such object, NULL is returned.  */
 888
 889 static tree
 890 determine_base_object (tree expr)
 891 {
 892   enum tree_code code = TREE_CODE (expr);
 893   tree base, obj;
 894
 895   /* If this is a pointer casted to any type, we need to determine
 896      the base object for the pointer; so handle conversions before
 897      throwing away non-pointer expressions.  */
 898   if (CONVERT_EXPR_P (expr))
 899     return determine_base_object (TREE_OPERAND (expr, 0));
 900
 901   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 902     return NULL_TREE;
 903
 904   switch (code)
 905     {
 906     case INTEGER_CST:
 907       return NULL_TREE;
 908
 909     case ADDR_EXPR:
 910       obj = TREE_OPERAND (expr, 0);
 911       base = get_base_address (obj);
 912
 913       if (!base)
 914         return expr;
 915
 916       if (TREE_CODE (base) == MEM_REF)
 917         return determine_base_object (TREE_OPERAND (base, 0));
 918
 919       return fold_convert (ptr_type_node,
 920                            build_fold_addr_expr (base));
 921
 922     case POINTER_PLUS_EXPR:
 923       return determine_base_object (TREE_OPERAND (expr, 0));
 924
 925     case PLUS_EXPR:
 926     case MINUS_EXPR:
 927       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 928       gcc_unreachable ();
 929
 930     default:
 931       return fold_convert (ptr_type_node, expr);
 932     }
 933 }
 934
 935 /* Return true if address expression with non-DECL_P operand appears
 936    in EXPR.  */
 937
 938 static bool
 939 contain_complex_addr_expr (tree expr)
 940 {
 941   bool res = false;
 942
 943   STRIP_NOPS (expr);
 944   switch (TREE_CODE (expr))
 945     {
 946     case POINTER_PLUS_EXPR:
 947     case PLUS_EXPR:
 948     case MINUS_EXPR:
 949       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 950       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 951       break;
 952
 953     case ADDR_EXPR:
 954       return (!DECL_P (TREE_OPERAND (expr, 0)));
 955
 956     default:
 957       return false;
 958     }
 959
 960   return res;
 961 }
 962
 963 /* Allocates an induction variable with given initial value BASE and step STEP
 964    for loop LOOP.  */
 965
 966 static struct iv *
 967 alloc_iv (tree base, tree step)
 968 {
 969   tree expr = base;
 970   struct iv *iv = XCNEW (struct iv);
 971   gcc_assert (step != NULL_TREE);
 972
 973   /* Lower address expression in base except ones with DECL_P as operand.
 974      By doing this:
 975        1) More accurate cost can be computed for address expressions;
 976        2) Duplicate candidates won't be created for bases in different
 977           forms, like &a[0] and &a.  */
 978   STRIP_NOPS (expr);
 979   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
 980       || contain_complex_addr_expr (expr))
 981     {
 982       aff_tree comb;
 983       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
 984       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
 985     }
 986
 987   iv->base = base;
 988   iv->base_object = determine_base_object (base);
 989   iv->step = step;
 990   iv->biv_p = false;
 991   iv->have_use_for = false;
 992   iv->use_id = 0;
 993   iv->ssa_name = NULL_TREE;
 994
 995   return iv;
 996 }
 997
 998 /* Sets STEP and BASE for induction variable IV.  */
 999
1000 static void
1001 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1002 {
1003   struct version_info *info = name_info (data, iv);
1004
1005   gcc_assert (!info->iv);
1006
1007   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1008   info->iv = alloc_iv (base, step);
1009   info->iv->ssa_name = iv;
1010 }
1011
1012 /* Finds induction variable declaration for VAR.  */
1013
1014 static struct iv *
1015 get_iv (struct ivopts_data *data, tree var)
1016 {
1017   basic_block bb;
1018   tree type = TREE_TYPE (var);
1019
1020   if (!POINTER_TYPE_P (type)
1021       && !INTEGRAL_TYPE_P (type))
1022     return NULL;
1023
1024   if (!name_info (data, var)->iv)
1025     {
1026       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1027
1028       if (!bb
1029           || !flow_bb_inside_loop_p (data->current_loop, bb))
1030         set_iv (data, var, var, build_int_cst (type, 0));
1031     }
1032
1033   return name_info (data, var)->iv;
1034 }
1035
1036 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1037    not define a simple affine biv with nonzero step.  */
1038
1039 static tree
1040 determine_biv_step (gimple_phi phi)
1041 {
1042   struct loop *loop = gimple_bb (phi)->loop_father;
1043   tree name = PHI_RESULT (phi);
1044   affine_iv iv;
1045
1046   if (virtual_operand_p (name))
1047     return NULL_TREE;
1048
1049   if (!simple_iv (loop, loop, name, &iv, true))
1050     return NULL_TREE;
1051
1052   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1053 }
1054
1055 /* Finds basic ivs.  */
1056
1057 static bool
1058 find_bivs (struct ivopts_data *data)
1059 {
1060   gimple_phi phi;
1061   tree step, type, base;
1062   bool found = false;
1063   struct loop *loop = data->current_loop;
1064   gimple_phi_iterator psi;
1065
1066   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1067     {
1068       phi = psi.phi ();
1069
1070       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1071         continue;
1072
1073       step = determine_biv_step (phi);
1074       if (!step)
1075         continue;
1076
1077       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1078       base = expand_simple_operations (base);
1079       if (contains_abnormal_ssa_name_p (base)
1080           || contains_abnormal_ssa_name_p (step))
1081         continue;
1082
1083       type = TREE_TYPE (PHI_RESULT (phi));
1084       base = fold_convert (type, base);
1085       if (step)
1086         {
1087           if (POINTER_TYPE_P (type))
1088             step = convert_to_ptrofftype (step);
1089           else
1090             step = fold_convert (type, step);
1091         }
1092
1093       set_iv (data, PHI_RESULT (phi), base, step);
1094       found = true;
1095     }
1096
1097   return found;
1098 }
1099
1100 /* Marks basic ivs.  */
1101
1102 static void
1103 mark_bivs (struct ivopts_data *data)
1104 {
1105   gimple_phi phi;
1106   gimple def;
1107   tree var;
1108   struct iv *iv, *incr_iv;
1109   struct loop *loop = data->current_loop;
1110   basic_block incr_bb;
1111   gimple_phi_iterator psi;
1112
1113   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1114     {
1115       phi = psi.phi ();
1116
1117       iv = get_iv (data, PHI_RESULT (phi));
1118       if (!iv)
1119         continue;
1120
1121       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1122       def = SSA_NAME_DEF_STMT (var);
1123       /* Don't mark iv peeled from other one as biv.  */
1124       if (def
1125           && gimple_code (def) == GIMPLE_PHI
1126           && gimple_bb (def) == loop->header)
1127         continue;
1128
1129       incr_iv = get_iv (data, var);
1130       if (!incr_iv)
1131         continue;
1132
1133       /* If the increment is in the subloop, ignore it.  */
1134       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1135       if (incr_bb->loop_father != data->current_loop
1136           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1137         continue;
1138
1139       iv->biv_p = true;
1140       incr_iv->biv_p = true;
1141     }
1142 }
1143
1144 /* Checks whether STMT defines a linear induction variable and stores its
1145    parameters to IV.  */
1146
1147 static bool
1148 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1149 {
1150   tree lhs;
1151   struct loop *loop = data->current_loop;
1152
1153   iv->base = NULL_TREE;
1154   iv->step = NULL_TREE;
1155
1156   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1157     return false;
1158
1159   lhs = gimple_assign_lhs (stmt);
1160   if (TREE_CODE (lhs) != SSA_NAME)
1161     return false;
1162
1163   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1164     return false;
1165   iv->base = expand_simple_operations (iv->base);
1166
1167   if (contains_abnormal_ssa_name_p (iv->base)
1168       || contains_abnormal_ssa_name_p (iv->step))
1169     return false;
1170
1171   /* If STMT could throw, then do not consider STMT as defining a GIV.
1172      While this will suppress optimizations, we can not safely delete this
1173      GIV and associated statements, even if it appears it is not used.  */
1174   if (stmt_could_throw_p (stmt))
1175     return false;
1176
1177   return true;
1178 }
1179
1180 /* Finds general ivs in statement STMT.  */
1181
1182 static void
1183 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1184 {
1185   affine_iv iv;
1186
1187   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1188     return;
1189
1190   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1191 }
1192
1193 /* Finds general ivs in basic block BB.  */
1194
1195 static void
1196 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1197 {
1198   gimple_stmt_iterator bsi;
1199
1200   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1201     find_givs_in_stmt (data, gsi_stmt (bsi));
1202 }
1203
1204 /* Finds general ivs.  */
1205
1206 static void
1207 find_givs (struct ivopts_data *data)
1208 {
1209   struct loop *loop = data->current_loop;
1210   basic_block *body = get_loop_body_in_dom_order (loop);
1211   unsigned i;
1212
1213   for (i = 0; i < loop->num_nodes; i++)
1214     find_givs_in_bb (data, body[i]);
1215   free (body);
1216 }
1217
1218 /* For each ssa name defined in LOOP determines whether it is an induction
1219    variable and if so, its initial value and step.  */
1220
1221 static bool
1222 find_induction_variables (struct ivopts_data *data)
1223 {
1224   unsigned i;
1225   bitmap_iterator bi;
1226
1227   if (!find_bivs (data))
1228     return false;
1229
1230   find_givs (data);
1231   mark_bivs (data);
1232
1233   if (dump_file && (dump_flags & TDF_DETAILS))
1234     {
1235       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1236
1237       if (niter)
1238         {
1239           fprintf (dump_file, "  number of iterations ");
1240           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1241           if (!integer_zerop (niter->may_be_zero))
1242             {
1243               fprintf (dump_file, "; zero if ");
1244               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1245             }
1246           fprintf (dump_file, "\n\n");
1247         };
1248
1249       fprintf (dump_file, "Induction variables:\n\n");
1250
1251       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1252         {
1253           if (ver_info (data, i)->iv)
1254             dump_iv (dump_file, ver_info (data, i)->iv);
1255         }
1256     }
1257
1258   return true;
1259 }
1260
1261 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1262
1263 static struct iv_use *
1264 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1265             gimple stmt, enum use_type use_type)
1266 {
1267   struct iv_use *use = XCNEW (struct iv_use);
1268
1269   use->id = n_iv_uses (data);
1270   use->type = use_type;
1271   use->iv = iv;
1272   use->stmt = stmt;
1273   use->op_p = use_p;
1274   use->related_cands = BITMAP_ALLOC (NULL);
1275
1276   /* To avoid showing ssa name in the dumps, if it was not reset by the
1277      caller.  */
1278   iv->ssa_name = NULL_TREE;
1279
1280   if (dump_file && (dump_flags & TDF_DETAILS))
1281     dump_use (dump_file, use);
1282
1283   data->iv_uses.safe_push (use);
1284
1285   return use;
1286 }
1287
1288 /* Checks whether OP is a loop-level invariant and if so, records it.
1289    NONLINEAR_USE is true if the invariant is used in a way we do not
1290    handle specially.  */
1291
1292 static void
1293 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1294 {
1295   basic_block bb;
1296   struct version_info *info;
1297
1298   if (TREE_CODE (op) != SSA_NAME
1299       || virtual_operand_p (op))
1300     return;
1301
1302   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1303   if (bb
1304       && flow_bb_inside_loop_p (data->current_loop, bb))
1305     return;
1306
1307   info = name_info (data, op);
1308   info->name = op;
1309   info->has_nonlin_use |= nonlinear_use;
1310   if (!info->inv_id)
1311     info->inv_id = ++data->max_inv_id;
1312   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1313 }
1314
1315 /* Checks whether the use OP is interesting and if so, records it.  */
1316
1317 static struct iv_use *
1318 find_interesting_uses_op (struct ivopts_data *data, tree op)
1319 {
1320   struct iv *iv;
1321   struct iv *civ;
1322   gimple stmt;
1323   struct iv_use *use;
1324
1325   if (TREE_CODE (op) != SSA_NAME)
1326     return NULL;
1327
1328   iv = get_iv (data, op);
1329   if (!iv)
1330     return NULL;
1331
1332   if (iv->have_use_for)
1333     {
1334       use = iv_use (data, iv->use_id);
1335
1336       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1337       return use;
1338     }
1339
1340   if (integer_zerop (iv->step))
1341     {
1342       record_invariant (data, op, true);
1343       return NULL;
1344     }
1345   iv->have_use_for = true;
1346
1347   civ = XNEW (struct iv);
1348   *civ = *iv;
1349
1350   stmt = SSA_NAME_DEF_STMT (op);
1351   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1352               || is_gimple_assign (stmt));
1353
1354   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1355   iv->use_id = use->id;
1356
1357   return use;
1358 }
1359
1360 /* Given a condition in statement STMT, checks whether it is a compare
1361    of an induction variable and an invariant.  If this is the case,
1362    CONTROL_VAR is set to location of the iv, BOUND to the location of
1363    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1364    induction variable descriptions, and true is returned.  If this is not
1365    the case, CONTROL_VAR and BOUND are set to the arguments of the
1366    condition and false is returned.  */
1367
1368 static bool
1369 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1370                        tree **control_var, tree **bound,
1371                        struct iv **iv_var, struct iv **iv_bound)
1372 {
1373   /* The objects returned when COND has constant operands.  */
1374   static struct iv const_iv;
1375   static tree zero;
1376   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1377   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1378   bool ret = false;
1379
1380   if (gimple_code (stmt) == GIMPLE_COND)
1381     {
1382       op0 = gimple_cond_lhs_ptr (stmt);
1383       op1 = gimple_cond_rhs_ptr (stmt);
1384     }
1385   else
1386     {
1387       op0 = gimple_assign_rhs1_ptr (stmt);
1388       op1 = gimple_assign_rhs2_ptr (stmt);
1389     }
1390
1391   zero = integer_zero_node;
1392   const_iv.step = integer_zero_node;
1393
1394   if (TREE_CODE (*op0) == SSA_NAME)
1395     iv0 = get_iv (data, *op0);
1396   if (TREE_CODE (*op1) == SSA_NAME)
1397     iv1 = get_iv (data, *op1);
1398
1399   /* Exactly one of the compared values must be an iv, and the other one must
1400      be an invariant.  */
1401   if (!iv0 || !iv1)
1402     goto end;
1403
1404   if (integer_zerop (iv0->step))
1405     {
1406       /* Control variable may be on the other side.  */
1407       tmp_op = op0; op0 = op1; op1 = tmp_op;
1408       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1409     }
1410   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1411
1412 end:
1413   if (control_var)
1414     *control_var = op0;;
1415   if (iv_var)
1416     *iv_var = iv0;;
1417   if (bound)
1418     *bound = op1;
1419   if (iv_bound)
1420     *iv_bound = iv1;
1421
1422   return ret;
1423 }
1424
1425 /* Checks whether the condition in STMT is interesting and if so,
1426    records it.  */
1427
1428 static void
1429 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1430 {
1431   tree *var_p, *bound_p;
1432   struct iv *var_iv, *civ;
1433
1434   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1435     {
1436       find_interesting_uses_op (data, *var_p);
1437       find_interesting_uses_op (data, *bound_p);
1438       return;
1439     }
1440
1441   civ = XNEW (struct iv);
1442   *civ = *var_iv;
1443   record_use (data, NULL, civ, stmt, USE_COMPARE);
1444 }
1445
1446 /* Returns the outermost loop EXPR is obviously invariant in
1447    relative to the loop LOOP, i.e. if all its operands are defined
1448    outside of the returned loop.  Returns NULL if EXPR is not
1449    even obviously invariant in LOOP.  */
1450
1451 struct loop *
1452 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1453 {
1454   basic_block def_bb;
1455   unsigned i, len;
1456
1457   if (is_gimple_min_invariant (expr))
1458     return current_loops->tree_root;
1459
1460   if (TREE_CODE (expr) == SSA_NAME)
1461     {
1462       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1463       if (def_bb)
1464         {
1465           if (flow_bb_inside_loop_p (loop, def_bb))
1466             return NULL;
1467           return superloop_at_depth (loop,
1468                                      loop_depth (def_bb->loop_father) + 1);
1469         }
1470
1471       return current_loops->tree_root;
1472     }
1473
1474   if (!EXPR_P (expr))
1475     return NULL;
1476
1477   unsigned maxdepth = 0;
1478   len = TREE_OPERAND_LENGTH (expr);
1479   for (i = 0; i < len; i++)
1480     {
1481       struct loop *ivloop;
1482       if (!TREE_OPERAND (expr, i))
1483         continue;
1484
1485       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1486       if (!ivloop)
1487         return NULL;
1488       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1489     }
1490
1491   return superloop_at_depth (loop, maxdepth);
1492 }
1493
1494 /* Returns true if expression EXPR is obviously invariant in LOOP,
1495    i.e. if all its operands are defined outside of the LOOP.  LOOP
1496    should not be the function body.  */
1497
1498 bool
1499 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1500 {
1501   basic_block def_bb;
1502   unsigned i, len;
1503
1504   gcc_assert (loop_depth (loop) > 0);
1505
1506   if (is_gimple_min_invariant (expr))
1507     return true;
1508
1509   if (TREE_CODE (expr) == SSA_NAME)
1510     {
1511       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1512       if (def_bb
1513           && flow_bb_inside_loop_p (loop, def_bb))
1514         return false;
1515
1516       return true;
1517     }
1518
1519   if (!EXPR_P (expr))
1520     return false;
1521
1522   len = TREE_OPERAND_LENGTH (expr);
1523   for (i = 0; i < len; i++)
1524     if (TREE_OPERAND (expr, i)
1525         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1526       return false;
1527
1528   return true;
1529 }
1530
1531 /* Cumulates the steps of indices into DATA and replaces their values with the
1532    initial ones.  Returns false when the value of the index cannot be determined.
1533    Callback for for_each_index.  */
1534
1535 struct ifs_ivopts_data
1536 {
1537   struct ivopts_data *ivopts_data;
1538   gimple stmt;
1539   tree step;
1540 };
1541
1542 static bool
1543 idx_find_step (tree base, tree *idx, void *data)
1544 {
1545   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1546   struct iv *iv;
1547   tree step, iv_base, iv_step, lbound, off;
1548   struct loop *loop = dta->ivopts_data->current_loop;
1549
1550   /* If base is a component ref, require that the offset of the reference
1551      be invariant.  */
1552   if (TREE_CODE (base) == COMPONENT_REF)
1553     {
1554       off = component_ref_field_offset (base);
1555       return expr_invariant_in_loop_p (loop, off);
1556     }
1557
1558   /* If base is array, first check whether we will be able to move the
1559      reference out of the loop (in order to take its address in strength
1560      reduction).  In order for this to work we need both lower bound
1561      and step to be loop invariants.  */
1562   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1563     {
1564       /* Moreover, for a range, the size needs to be invariant as well.  */
1565       if (TREE_CODE (base) == ARRAY_RANGE_REF
1566           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1567         return false;
1568
1569       step = array_ref_element_size (base);
1570       lbound = array_ref_low_bound (base);
1571
1572       if (!expr_invariant_in_loop_p (loop, step)
1573           || !expr_invariant_in_loop_p (loop, lbound))
1574         return false;
1575     }
1576
1577   if (TREE_CODE (*idx) != SSA_NAME)
1578     return true;
1579
1580   iv = get_iv (dta->ivopts_data, *idx);
1581   if (!iv)
1582     return false;
1583
1584   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1585           *&x[0], which is not folded and does not trigger the
1586           ARRAY_REF path below.  */
1587   *idx = iv->base;
1588
1589   if (integer_zerop (iv->step))
1590     return true;
1591
1592   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1593     {
1594       step = array_ref_element_size (base);
1595
1596       /* We only handle addresses whose step is an integer constant.  */
1597       if (TREE_CODE (step) != INTEGER_CST)
1598         return false;
1599     }
1600   else
1601     /* The step for pointer arithmetics already is 1 byte.  */
1602     step = size_one_node;
1603
1604   iv_base = iv->base;
1605   iv_step = iv->step;
1606   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1607                             sizetype, &iv_base, &iv_step, dta->stmt,
1608                             false))
1609     {
1610       /* The index might wrap.  */
1611       return false;
1612     }
1613
1614   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1615   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1616
1617   return true;
1618 }
1619
1620 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1621    object is passed to it in DATA.  */
1622
1623 static bool
1624 idx_record_use (tree base, tree *idx,
1625                 void *vdata)
1626 {
1627   struct ivopts_data *data = (struct ivopts_data *) vdata;
1628   find_interesting_uses_op (data, *idx);
1629   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1630     {
1631       find_interesting_uses_op (data, array_ref_element_size (base));
1632       find_interesting_uses_op (data, array_ref_low_bound (base));
1633     }
1634   return true;
1635 }
1636
1637 /* If we can prove that TOP = cst * BOT for some constant cst,
1638    store cst to MUL and return true.  Otherwise return false.
1639    The returned value is always sign-extended, regardless of the
1640    signedness of TOP and BOT.  */
1641
1642 static bool
1643 constant_multiple_of (tree top, tree bot, widest_int *mul)
1644 {
1645   tree mby;
1646   enum tree_code code;
1647   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1648   widest_int res, p0, p1;
1649
1650   STRIP_NOPS (top);
1651   STRIP_NOPS (bot);
1652
1653   if (operand_equal_p (top, bot, 0))
1654     {
1655       *mul = 1;
1656       return true;
1657     }
1658
1659   code = TREE_CODE (top);
1660   switch (code)
1661     {
1662     case MULT_EXPR:
1663       mby = TREE_OPERAND (top, 1);
1664       if (TREE_CODE (mby) != INTEGER_CST)
1665         return false;
1666
1667       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1668         return false;
1669
1670       *mul = wi::sext (res * wi::to_widest (mby), precision);
1671       return true;
1672
1673     case PLUS_EXPR:
1674     case MINUS_EXPR:
1675       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1676           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1677         return false;
1678
1679       if (code == MINUS_EXPR)
1680         p1 = -p1;
1681       *mul = wi::sext (p0 + p1, precision);
1682       return true;
1683
1684     case INTEGER_CST:
1685       if (TREE_CODE (bot) != INTEGER_CST)
1686         return false;
1687
1688       p0 = widest_int::from (top, SIGNED);
1689       p1 = widest_int::from (bot, SIGNED);
1690       if (p1 == 0)
1691         return false;
1692       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1693       return res == 0;
1694
1695     default:
1696       return false;
1697     }
1698 }
1699
1700 /* Return true if memory reference REF with step STEP may be unaligned.  */
1701
1702 static bool
1703 may_be_unaligned_p (tree ref, tree step)
1704 {
1705   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1706      thus they are not misaligned.  */
1707   if (TREE_CODE (ref) == TARGET_MEM_REF)
1708     return false;
1709
1710   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1711   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1712     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1713
1714   unsigned HOST_WIDE_INT bitpos;
1715   unsigned int ref_align;
1716   get_object_alignment_1 (ref, &ref_align, &bitpos);
1717   if (ref_align < align
1718       || (bitpos % align) != 0
1719       || (bitpos % BITS_PER_UNIT) != 0)
1720     return true;
1721
1722   unsigned int trailing_zeros = tree_ctz (step);
1723   if (trailing_zeros < HOST_BITS_PER_INT
1724       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1725     return true;
1726
1727   return false;
1728 }
1729
1730 /* Return true if EXPR may be non-addressable.   */
1731
1732 bool
1733 may_be_nonaddressable_p (tree expr)
1734 {
1735   switch (TREE_CODE (expr))
1736     {
1737     case TARGET_MEM_REF:
1738       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1739          target, thus they are always addressable.  */
1740       return false;
1741
1742     case COMPONENT_REF:
1743       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1744              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1745
1746     case VIEW_CONVERT_EXPR:
1747       /* This kind of view-conversions may wrap non-addressable objects
1748          and make them look addressable.  After some processing the
1749          non-addressability may be uncovered again, causing ADDR_EXPRs
1750          of inappropriate objects to be built.  */
1751       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1752           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1753         return true;
1754
1755       /* ... fall through ... */
1756
1757     case ARRAY_REF:
1758     case ARRAY_RANGE_REF:
1759       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1760
1761     CASE_CONVERT:
1762       return true;
1763
1764     default:
1765       break;
1766     }
1767
1768   return false;
1769 }
1770
1771 /* Finds addresses in *OP_P inside STMT.  */
1772
1773 static void
1774 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1775 {
1776   tree base = *op_p, step = size_zero_node;
1777   struct iv *civ;
1778   struct ifs_ivopts_data ifs_ivopts_data;
1779
1780   /* Do not play with volatile memory references.  A bit too conservative,
1781      perhaps, but safe.  */
1782   if (gimple_has_volatile_ops (stmt))
1783     goto fail;
1784
1785   /* Ignore bitfields for now.  Not really something terribly complicated
1786      to handle.  TODO.  */
1787   if (TREE_CODE (base) == BIT_FIELD_REF)
1788     goto fail;
1789
1790   base = unshare_expr (base);
1791
1792   if (TREE_CODE (base) == TARGET_MEM_REF)
1793     {
1794       tree type = build_pointer_type (TREE_TYPE (base));
1795       tree astep;
1796
1797       if (TMR_BASE (base)
1798           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1799         {
1800           civ = get_iv (data, TMR_BASE (base));
1801           if (!civ)
1802             goto fail;
1803
1804           TMR_BASE (base) = civ->base;
1805           step = civ->step;
1806         }
1807       if (TMR_INDEX2 (base)
1808           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1809         {
1810           civ = get_iv (data, TMR_INDEX2 (base));
1811           if (!civ)
1812             goto fail;
1813
1814           TMR_INDEX2 (base) = civ->base;
1815           step = civ->step;
1816         }
1817       if (TMR_INDEX (base)
1818           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1819         {
1820           civ = get_iv (data, TMR_INDEX (base));
1821           if (!civ)
1822             goto fail;
1823
1824           TMR_INDEX (base) = civ->base;
1825           astep = civ->step;
1826
1827           if (astep)
1828             {
1829               if (TMR_STEP (base))
1830                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1831
1832               step = fold_build2 (PLUS_EXPR, type, step, astep);
1833             }
1834         }
1835
1836       if (integer_zerop (step))
1837         goto fail;
1838       base = tree_mem_ref_addr (type, base);
1839     }
1840   else
1841     {
1842       ifs_ivopts_data.ivopts_data = data;
1843       ifs_ivopts_data.stmt = stmt;
1844       ifs_ivopts_data.step = size_zero_node;
1845       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1846           || integer_zerop (ifs_ivopts_data.step))
1847         goto fail;
1848       step = ifs_ivopts_data.step;
1849
1850       /* Check that the base expression is addressable.  This needs
1851          to be done after substituting bases of IVs into it.  */
1852       if (may_be_nonaddressable_p (base))
1853         goto fail;
1854
1855       /* Moreover, on strict alignment platforms, check that it is
1856          sufficiently aligned.  */
1857       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1858         goto fail;
1859
1860       base = build_fold_addr_expr (base);
1861
1862       /* Substituting bases of IVs into the base expression might
1863          have caused folding opportunities.  */
1864       if (TREE_CODE (base) == ADDR_EXPR)
1865         {
1866           tree *ref = &TREE_OPERAND (base, 0);
1867           while (handled_component_p (*ref))
1868             ref = &TREE_OPERAND (*ref, 0);
1869           if (TREE_CODE (*ref) == MEM_REF)
1870             {
1871               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1872                                       TREE_OPERAND (*ref, 0),
1873                                       TREE_OPERAND (*ref, 1));
1874               if (tem)
1875                 *ref = tem;
1876             }
1877         }
1878     }
1879
1880   civ = alloc_iv (base, step);
1881   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1882   return;
1883
1884 fail:
1885   for_each_index (op_p, idx_record_use, data);
1886 }
1887
1888 /* Finds and records invariants used in STMT.  */
1889
1890 static void
1891 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1892 {
1893   ssa_op_iter iter;
1894   use_operand_p use_p;
1895   tree op;
1896
1897   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1898     {
1899       op = USE_FROM_PTR (use_p);
1900       record_invariant (data, op, false);
1901     }
1902 }
1903
1904 /* Finds interesting uses of induction variables in the statement STMT.  */
1905
1906 static void
1907 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1908 {
1909   struct iv *iv;
1910   tree op, *lhs, *rhs;
1911   ssa_op_iter iter;
1912   use_operand_p use_p;
1913   enum tree_code code;
1914
1915   find_invariants_stmt (data, stmt);
1916
1917   if (gimple_code (stmt) == GIMPLE_COND)
1918     {
1919       find_interesting_uses_cond (data, stmt);
1920       return;
1921     }
1922
1923   if (is_gimple_assign (stmt))
1924     {
1925       lhs = gimple_assign_lhs_ptr (stmt);
1926       rhs = gimple_assign_rhs1_ptr (stmt);
1927
1928       if (TREE_CODE (*lhs) == SSA_NAME)
1929         {
1930           /* If the statement defines an induction variable, the uses are not
1931              interesting by themselves.  */
1932
1933           iv = get_iv (data, *lhs);
1934
1935           if (iv && !integer_zerop (iv->step))
1936             return;
1937         }
1938
1939       code = gimple_assign_rhs_code (stmt);
1940       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1941           && (REFERENCE_CLASS_P (*rhs)
1942               || is_gimple_val (*rhs)))
1943         {
1944           if (REFERENCE_CLASS_P (*rhs))
1945             find_interesting_uses_address (data, stmt, rhs);
1946           else
1947             find_interesting_uses_op (data, *rhs);
1948
1949           if (REFERENCE_CLASS_P (*lhs))
1950             find_interesting_uses_address (data, stmt, lhs);
1951           return;
1952         }
1953       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1954         {
1955           find_interesting_uses_cond (data, stmt);
1956           return;
1957         }
1958
1959       /* TODO -- we should also handle address uses of type
1960
1961          memory = call (whatever);
1962
1963          and
1964
1965          call (memory).  */
1966     }
1967
1968   if (gimple_code (stmt) == GIMPLE_PHI
1969       && gimple_bb (stmt) == data->current_loop->header)
1970     {
1971       iv = get_iv (data, PHI_RESULT (stmt));
1972
1973       if (iv && !integer_zerop (iv->step))
1974         return;
1975     }
1976
1977   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1978     {
1979       op = USE_FROM_PTR (use_p);
1980
1981       if (TREE_CODE (op) != SSA_NAME)
1982         continue;
1983
1984       iv = get_iv (data, op);
1985       if (!iv)
1986         continue;
1987
1988       find_interesting_uses_op (data, op);
1989     }
1990 }
1991
1992 /* Finds interesting uses of induction variables outside of loops
1993    on loop exit edge EXIT.  */
1994
1995 static void
1996 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1997 {
1998   gimple_phi phi;
1999   gimple_phi_iterator psi;
2000   tree def;
2001
2002   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2003     {
2004       phi = psi.phi ();
2005       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2006       if (!virtual_operand_p (def))
2007         find_interesting_uses_op (data, def);
2008     }
2009 }
2010
2011 /* Finds uses of the induction variables that are interesting.  */
2012
2013 static void
2014 find_interesting_uses (struct ivopts_data *data)
2015 {
2016   basic_block bb;
2017   gimple_stmt_iterator bsi;
2018   basic_block *body = get_loop_body (data->current_loop);
2019   unsigned i;
2020   struct version_info *info;
2021   edge e;
2022
2023   if (dump_file && (dump_flags & TDF_DETAILS))
2024     fprintf (dump_file, "Uses:\n\n");
2025
2026   for (i = 0; i < data->current_loop->num_nodes; i++)
2027     {
2028       edge_iterator ei;
2029       bb = body[i];
2030
2031       FOR_EACH_EDGE (e, ei, bb->succs)
2032         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2033             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2034           find_interesting_uses_outside (data, e);
2035
2036       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2037         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2038       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2039         if (!is_gimple_debug (gsi_stmt (bsi)))
2040           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2041     }
2042
2043   if (dump_file && (dump_flags & TDF_DETAILS))
2044     {
2045       bitmap_iterator bi;
2046
2047       fprintf (dump_file, "\n");
2048
2049       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2050         {
2051           info = ver_info (data, i);
2052           if (info->inv_id)
2053             {
2054               fprintf (dump_file, "  ");
2055               print_generic_expr (dump_file, info->name, TDF_SLIM);
2056               fprintf (dump_file, " is invariant (%d)%s\n",
2057                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2058             }
2059         }
2060
2061       fprintf (dump_file, "\n");
2062     }
2063
2064   free (body);
2065 }
2066
2067 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2068    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2069    we are at the top-level of the processed address.  */
2070
2071 static tree
2072 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2073                 HOST_WIDE_INT *offset)
2074 {
2075   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2076   enum tree_code code;
2077   tree type, orig_type = TREE_TYPE (expr);
2078   HOST_WIDE_INT off0, off1, st;
2079   tree orig_expr = expr;
2080
2081   STRIP_NOPS (expr);
2082
2083   type = TREE_TYPE (expr);
2084   code = TREE_CODE (expr);
2085   *offset = 0;
2086
2087   switch (code)
2088     {
2089     case INTEGER_CST:
2090       if (!cst_and_fits_in_hwi (expr)
2091           || integer_zerop (expr))
2092         return orig_expr;
2093
2094       *offset = int_cst_value (expr);
2095       return build_int_cst (orig_type, 0);
2096
2097     case POINTER_PLUS_EXPR:
2098     case PLUS_EXPR:
2099     case MINUS_EXPR:
2100       op0 = TREE_OPERAND (expr, 0);
2101       op1 = TREE_OPERAND (expr, 1);
2102
2103       op0 = strip_offset_1 (op0, false, false, &off0);
2104       op1 = strip_offset_1 (op1, false, false, &off1);
2105
2106       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2107       if (op0 == TREE_OPERAND (expr, 0)
2108           && op1 == TREE_OPERAND (expr, 1))
2109         return orig_expr;
2110
2111       if (integer_zerop (op1))
2112         expr = op0;
2113       else if (integer_zerop (op0))
2114         {
2115           if (code == MINUS_EXPR)
2116             expr = fold_build1 (NEGATE_EXPR, type, op1);
2117           else
2118             expr = op1;
2119         }
2120       else
2121         expr = fold_build2 (code, type, op0, op1);
2122
2123       return fold_convert (orig_type, expr);
2124
2125     case MULT_EXPR:
2126       op1 = TREE_OPERAND (expr, 1);
2127       if (!cst_and_fits_in_hwi (op1))
2128         return orig_expr;
2129
2130       op0 = TREE_OPERAND (expr, 0);
2131       op0 = strip_offset_1 (op0, false, false, &off0);
2132       if (op0 == TREE_OPERAND (expr, 0))
2133         return orig_expr;
2134
2135       *offset = off0 * int_cst_value (op1);
2136       if (integer_zerop (op0))
2137         expr = op0;
2138       else
2139         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2140
2141       return fold_convert (orig_type, expr);
2142
2143     case ARRAY_REF:
2144     case ARRAY_RANGE_REF:
2145       if (!inside_addr)
2146         return orig_expr;
2147
2148       step = array_ref_element_size (expr);
2149       if (!cst_and_fits_in_hwi (step))
2150         break;
2151
2152       st = int_cst_value (step);
2153       op1 = TREE_OPERAND (expr, 1);
2154       op1 = strip_offset_1 (op1, false, false, &off1);
2155       *offset = off1 * st;
2156
2157       if (top_compref
2158           && integer_zerop (op1))
2159         {
2160           /* Strip the component reference completely.  */
2161           op0 = TREE_OPERAND (expr, 0);
2162           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2163           *offset += off0;
2164           return op0;
2165         }
2166       break;
2167
2168     case COMPONENT_REF:
2169       {
2170         tree field;
2171
2172         if (!inside_addr)
2173           return orig_expr;
2174
2175         tmp = component_ref_field_offset (expr);
2176         field = TREE_OPERAND (expr, 1);
2177         if (top_compref
2178             && cst_and_fits_in_hwi (tmp)
2179             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2180           {
2181             HOST_WIDE_INT boffset, abs_off;
2182
2183             /* Strip the component reference completely.  */
2184             op0 = TREE_OPERAND (expr, 0);
2185             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2186             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2187             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2188             if (boffset < 0)
2189               abs_off = -abs_off;
2190
2191             *offset = off0 + int_cst_value (tmp) + abs_off;
2192             return op0;
2193           }
2194       }
2195       break;
2196
2197     case ADDR_EXPR:
2198       op0 = TREE_OPERAND (expr, 0);
2199       op0 = strip_offset_1 (op0, true, true, &off0);
2200       *offset += off0;
2201
2202       if (op0 == TREE_OPERAND (expr, 0))
2203         return orig_expr;
2204
2205       expr = build_fold_addr_expr (op0);
2206       return fold_convert (orig_type, expr);
2207
2208     case MEM_REF:
2209       /* ???  Offset operand?  */
2210       inside_addr = false;
2211       break;
2212
2213     default:
2214       return orig_expr;
2215     }
2216
2217   /* Default handling of expressions for that we want to recurse into
2218      the first operand.  */
2219   op0 = TREE_OPERAND (expr, 0);
2220   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2221   *offset += off0;
2222
2223   if (op0 == TREE_OPERAND (expr, 0)
2224       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2225     return orig_expr;
2226
2227   expr = copy_node (expr);
2228   TREE_OPERAND (expr, 0) = op0;
2229   if (op1)
2230     TREE_OPERAND (expr, 1) = op1;
2231
2232   /* Inside address, we might strip the top level component references,
2233      thus changing type of the expression.  Handling of ADDR_EXPR
2234      will fix that.  */
2235   expr = fold_convert (orig_type, expr);
2236
2237   return expr;
2238 }
2239
2240 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2241
2242 static tree
2243 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2244 {
2245   HOST_WIDE_INT off;
2246   tree core = strip_offset_1 (expr, false, false, &off);
2247   *offset = off;
2248   return core;
2249 }
2250
2251 /* Returns variant of TYPE that can be used as base for different uses.
2252    We return unsigned type with the same precision, which avoids problems
2253    with overflows.  */
2254
2255 static tree
2256 generic_type_for (tree type)
2257 {
2258   if (POINTER_TYPE_P (type))
2259     return unsigned_type_for (type);
2260
2261   if (TYPE_UNSIGNED (type))
2262     return type;
2263
2264   return unsigned_type_for (type);
2265 }
2266
2267 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2268    the bitmap to that we should store it.  */
2269
2270 static struct ivopts_data *fd_ivopts_data;
2271 static tree
2272 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2273 {
2274   bitmap *depends_on = (bitmap *) data;
2275   struct version_info *info;
2276
2277   if (TREE_CODE (*expr_p) != SSA_NAME)
2278     return NULL_TREE;
2279   info = name_info (fd_ivopts_data, *expr_p);
2280
2281   if (!info->inv_id || info->has_nonlin_use)
2282     return NULL_TREE;
2283
2284   if (!*depends_on)
2285     *depends_on = BITMAP_ALLOC (NULL);
2286   bitmap_set_bit (*depends_on, info->inv_id);
2287
2288   return NULL_TREE;
2289 }
2290
2291 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2292    position to POS.  If USE is not NULL, the candidate is set as related to
2293    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2294    replacement of the final value of the iv by a direct computation.  */
2295
2296 static struct iv_cand *
2297 add_candidate_1 (struct ivopts_data *data,
2298                  tree base, tree step, bool important, enum iv_position pos,
2299                  struct iv_use *use, gimple incremented_at)
2300 {
2301   unsigned i;
2302   struct iv_cand *cand = NULL;
2303   tree type, orig_type;
2304
2305   /* For non-original variables, make sure their values are computed in a type
2306      that does not invoke undefined behavior on overflows (since in general,
2307      we cannot prove that these induction variables are non-wrapping).  */
2308   if (pos != IP_ORIGINAL)
2309     {
2310       orig_type = TREE_TYPE (base);
2311       type = generic_type_for (orig_type);
2312       if (type != orig_type)
2313         {
2314           base = fold_convert (type, base);
2315           step = fold_convert (type, step);
2316         }
2317     }
2318
2319   for (i = 0; i < n_iv_cands (data); i++)
2320     {
2321       cand = iv_cand (data, i);
2322
2323       if (cand->pos != pos)
2324         continue;
2325
2326       if (cand->incremented_at != incremented_at
2327           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2328               && cand->ainc_use != use))
2329         continue;
2330
2331       if (!cand->iv)
2332         {
2333           if (!base && !step)
2334             break;
2335
2336           continue;
2337         }
2338
2339       if (!base && !step)
2340         continue;
2341
2342       if (operand_equal_p (base, cand->iv->base, 0)
2343           && operand_equal_p (step, cand->iv->step, 0)
2344           && (TYPE_PRECISION (TREE_TYPE (base))
2345               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2346         break;
2347     }
2348
2349   if (i == n_iv_cands (data))
2350     {
2351       cand = XCNEW (struct iv_cand);
2352       cand->id = i;
2353
2354       if (!base && !step)
2355         cand->iv = NULL;
2356       else
2357         cand->iv = alloc_iv (base, step);
2358
2359       cand->pos = pos;
2360       if (pos != IP_ORIGINAL && cand->iv)
2361         {
2362           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2363           cand->var_after = cand->var_before;
2364         }
2365       cand->important = important;
2366       cand->incremented_at = incremented_at;
2367       data->iv_candidates.safe_push (cand);
2368
2369       if (step
2370           && TREE_CODE (step) != INTEGER_CST)
2371         {
2372           fd_ivopts_data = data;
2373           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2374         }
2375
2376       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2377         cand->ainc_use = use;
2378       else
2379         cand->ainc_use = NULL;
2380
2381       if (dump_file && (dump_flags & TDF_DETAILS))
2382         dump_cand (dump_file, cand);
2383     }
2384
2385   if (important && !cand->important)
2386     {
2387       cand->important = true;
2388       if (dump_file && (dump_flags & TDF_DETAILS))
2389         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2390     }
2391
2392   if (use)
2393     {
2394       bitmap_set_bit (use->related_cands, i);
2395       if (dump_file && (dump_flags & TDF_DETAILS))
2396         fprintf (dump_file, "Candidate %d is related to use %d\n",
2397                  cand->id, use->id);
2398     }
2399
2400   return cand;
2401 }
2402
2403 /* Returns true if incrementing the induction variable at the end of the LOOP
2404    is allowed.
2405
2406    The purpose is to avoid splitting latch edge with a biv increment, thus
2407    creating a jump, possibly confusing other optimization passes and leaving
2408    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2409    is not available (so we do not have a better alternative), or if the latch
2410    edge is already nonempty.  */
2411
2412 static bool
2413 allow_ip_end_pos_p (struct loop *loop)
2414 {
2415   if (!ip_normal_pos (loop))
2416     return true;
2417
2418   if (!empty_block_p (ip_end_pos (loop)))
2419     return true;
2420
2421   return false;
2422 }
2423
2424 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2425    Important field is set to IMPORTANT.  */
2426
2427 static void
2428 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2429                         bool important, struct iv_use *use)
2430 {
2431   basic_block use_bb = gimple_bb (use->stmt);
2432   enum machine_mode mem_mode;
2433   unsigned HOST_WIDE_INT cstepi;
2434
2435   /* If we insert the increment in any position other than the standard
2436      ones, we must ensure that it is incremented once per iteration.
2437      It must not be in an inner nested loop, or one side of an if
2438      statement.  */
2439   if (use_bb->loop_father != data->current_loop
2440       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2441       || stmt_could_throw_p (use->stmt)
2442       || !cst_and_fits_in_hwi (step))
2443     return;
2444
2445   cstepi = int_cst_value (step);
2446
2447   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2448   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2449         || USE_STORE_PRE_INCREMENT (mem_mode))
2450        && GET_MODE_SIZE (mem_mode) == cstepi)
2451       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2452            || USE_STORE_PRE_DECREMENT (mem_mode))
2453           && GET_MODE_SIZE (mem_mode) == -cstepi))
2454     {
2455       enum tree_code code = MINUS_EXPR;
2456       tree new_base;
2457       tree new_step = step;
2458
2459       if (POINTER_TYPE_P (TREE_TYPE (base)))
2460         {
2461           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2462           code = POINTER_PLUS_EXPR;
2463         }
2464       else
2465         new_step = fold_convert (TREE_TYPE (base), new_step);
2466       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2467       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2468                        use->stmt);
2469     }
2470   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2471         || USE_STORE_POST_INCREMENT (mem_mode))
2472        && GET_MODE_SIZE (mem_mode) == cstepi)
2473       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2474            || USE_STORE_POST_DECREMENT (mem_mode))
2475           && GET_MODE_SIZE (mem_mode) == -cstepi))
2476     {
2477       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2478                        use->stmt);
2479     }
2480 }
2481
2482 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2483    position to POS.  If USE is not NULL, the candidate is set as related to
2484    it.  The candidate computation is scheduled on all available positions.  */
2485
2486 static void
2487 add_candidate (struct ivopts_data *data,
2488                tree base, tree step, bool important, struct iv_use *use)
2489 {
2490   if (ip_normal_pos (data->current_loop))
2491     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2492   if (ip_end_pos (data->current_loop)
2493       && allow_ip_end_pos_p (data->current_loop))
2494     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2495
2496   if (use != NULL && use->type == USE_ADDRESS)
2497     add_autoinc_candidates (data, base, step, important, use);
2498 }
2499
2500 /* Adds standard iv candidates.  */
2501
2502 static void
2503 add_standard_iv_candidates (struct ivopts_data *data)
2504 {
2505   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2506
2507   /* The same for a double-integer type if it is still fast enough.  */
2508   if (TYPE_PRECISION
2509         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2510       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2511     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2512                    build_int_cst (long_integer_type_node, 1), true, NULL);
2513
2514   /* The same for a double-integer type if it is still fast enough.  */
2515   if (TYPE_PRECISION
2516         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2517       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2518     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2519                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2520 }
2521
2522
2523 /* Adds candidates bases on the old induction variable IV.  */
2524
2525 static void
2526 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2527 {
2528   gimple phi;
2529   tree def;
2530   struct iv_cand *cand;
2531
2532   add_candidate (data, iv->base, iv->step, true, NULL);
2533
2534   /* The same, but with initial value zero.  */
2535   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2536     add_candidate (data, size_int (0), iv->step, true, NULL);
2537   else
2538     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2539                    iv->step, true, NULL);
2540
2541   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2542   if (gimple_code (phi) == GIMPLE_PHI)
2543     {
2544       /* Additionally record the possibility of leaving the original iv
2545          untouched.  */
2546       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2547       /* Don't add candidate if it's from another PHI node because
2548          it's an affine iv appearing in the form of PEELED_CHREC.  */
2549       phi = SSA_NAME_DEF_STMT (def);
2550       if (gimple_code (phi) != GIMPLE_PHI)
2551         {
2552           cand = add_candidate_1 (data,
2553                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2554                                   SSA_NAME_DEF_STMT (def));
2555           cand->var_before = iv->ssa_name;
2556           cand->var_after = def;
2557         }
2558       else
2559         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2560     }
2561 }
2562
2563 /* Adds candidates based on the old induction variables.  */
2564
2565 static void
2566 add_old_ivs_candidates (struct ivopts_data *data)
2567 {
2568   unsigned i;
2569   struct iv *iv;
2570   bitmap_iterator bi;
2571
2572   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2573     {
2574       iv = ver_info (data, i)->iv;
2575       if (iv && iv->biv_p && !integer_zerop (iv->step))
2576         add_old_iv_candidates (data, iv);
2577     }
2578 }
2579
2580 /* Adds candidates based on the value of the induction variable IV and USE.  */
2581
2582 static void
2583 add_iv_value_candidates (struct ivopts_data *data,
2584                          struct iv *iv, struct iv_use *use)
2585 {
2586   unsigned HOST_WIDE_INT offset;
2587   tree base;
2588   tree basetype;
2589
2590   add_candidate (data, iv->base, iv->step, false, use);
2591
2592   /* The same, but with initial value zero.  Make such variable important,
2593      since it is generic enough so that possibly many uses may be based
2594      on it.  */
2595   basetype = TREE_TYPE (iv->base);
2596   if (POINTER_TYPE_P (basetype))
2597     basetype = sizetype;
2598   add_candidate (data, build_int_cst (basetype, 0),
2599                  iv->step, true, use);
2600
2601   /* Third, try removing the constant offset.  Make sure to even
2602      add a candidate for &a[0] vs. (T *)&a.  */
2603   base = strip_offset (iv->base, &offset);
2604   if (offset
2605       || base != iv->base)
2606     add_candidate (data, base, iv->step, false, use);
2607 }
2608
2609 /* Adds candidates based on the uses.  */
2610
2611 static void
2612 add_derived_ivs_candidates (struct ivopts_data *data)
2613 {
2614   unsigned i;
2615
2616   for (i = 0; i < n_iv_uses (data); i++)
2617     {
2618       struct iv_use *use = iv_use (data, i);
2619
2620       if (!use)
2621         continue;
2622
2623       switch (use->type)
2624         {
2625         case USE_NONLINEAR_EXPR:
2626         case USE_COMPARE:
2627         case USE_ADDRESS:
2628           /* Just add the ivs based on the value of the iv used here.  */
2629           add_iv_value_candidates (data, use->iv, use);
2630           break;
2631
2632         default:
2633           gcc_unreachable ();
2634         }
2635     }
2636 }
2637
2638 /* Record important candidates and add them to related_cands bitmaps
2639    if needed.  */
2640
2641 static void
2642 record_important_candidates (struct ivopts_data *data)
2643 {
2644   unsigned i;
2645   struct iv_use *use;
2646
2647   for (i = 0; i < n_iv_cands (data); i++)
2648     {
2649       struct iv_cand *cand = iv_cand (data, i);
2650
2651       if (cand->important)
2652         bitmap_set_bit (data->important_candidates, i);
2653     }
2654
2655   data->consider_all_candidates = (n_iv_cands (data)
2656                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2657
2658   if (data->consider_all_candidates)
2659     {
2660       /* We will not need "related_cands" bitmaps in this case,
2661          so release them to decrease peak memory consumption.  */
2662       for (i = 0; i < n_iv_uses (data); i++)
2663         {
2664           use = iv_use (data, i);
2665           BITMAP_FREE (use->related_cands);
2666         }
2667     }
2668   else
2669     {
2670       /* Add important candidates to the related_cands bitmaps.  */
2671       for (i = 0; i < n_iv_uses (data); i++)
2672         bitmap_ior_into (iv_use (data, i)->related_cands,
2673                          data->important_candidates);
2674     }
2675 }
2676
2677 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2678    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2679    we allocate a simple list to every use.  */
2680
2681 static void
2682 alloc_use_cost_map (struct ivopts_data *data)
2683 {
2684   unsigned i, size, s;
2685
2686   for (i = 0; i < n_iv_uses (data); i++)
2687     {
2688       struct iv_use *use = iv_use (data, i);
2689
2690       if (data->consider_all_candidates)
2691         size = n_iv_cands (data);
2692       else
2693         {
2694           s = bitmap_count_bits (use->related_cands);
2695
2696           /* Round up to the power of two, so that moduling by it is fast.  */
2697           size = s ? (1 << ceil_log2 (s)) : 1;
2698         }
2699
2700       use->n_map_members = size;
2701       use->cost_map = XCNEWVEC (struct cost_pair, size);
2702     }
2703 }
2704
2705 /* Returns description of computation cost of expression whose runtime
2706    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2707
2708 static comp_cost
2709 new_cost (unsigned runtime, unsigned complexity)
2710 {
2711   comp_cost cost;
2712
2713   cost.cost = runtime;
2714   cost.complexity = complexity;
2715
2716   return cost;
2717 }
2718
2719 /* Adds costs COST1 and COST2.  */
2720
2721 static comp_cost
2722 add_costs (comp_cost cost1, comp_cost cost2)
2723 {
2724   cost1.cost += cost2.cost;
2725   cost1.complexity += cost2.complexity;
2726
2727   return cost1;
2728 }
2729 /* Subtracts costs COST1 and COST2.  */
2730
2731 static comp_cost
2732 sub_costs (comp_cost cost1, comp_cost cost2)
2733 {
2734   cost1.cost -= cost2.cost;
2735   cost1.complexity -= cost2.complexity;
2736
2737   return cost1;
2738 }
2739
2740 /* Returns a negative number if COST1 < COST2, a positive number if
2741    COST1 > COST2, and 0 if COST1 = COST2.  */
2742
2743 static int
2744 compare_costs (comp_cost cost1, comp_cost cost2)
2745 {
2746   if (cost1.cost == cost2.cost)
2747     return cost1.complexity - cost2.complexity;
2748
2749   return cost1.cost - cost2.cost;
2750 }
2751
2752 /* Returns true if COST is infinite.  */
2753
2754 static bool
2755 infinite_cost_p (comp_cost cost)
2756 {
2757   return cost.cost == INFTY;
2758 }
2759
2760 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2761    on invariants DEPENDS_ON and that the value used in expressing it
2762    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2763
2764 static void
2765 set_use_iv_cost (struct ivopts_data *data,
2766                  struct iv_use *use, struct iv_cand *cand,
2767                  comp_cost cost, bitmap depends_on, tree value,
2768                  enum tree_code comp, int inv_expr_id)
2769 {
2770   unsigned i, s;
2771
2772   if (infinite_cost_p (cost))
2773     {
2774       BITMAP_FREE (depends_on);
2775       return;
2776     }
2777
2778   if (data->consider_all_candidates)
2779     {
2780       use->cost_map[cand->id].cand = cand;
2781       use->cost_map[cand->id].cost = cost;
2782       use->cost_map[cand->id].depends_on = depends_on;
2783       use->cost_map[cand->id].value = value;
2784       use->cost_map[cand->id].comp = comp;
2785       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2786       return;
2787     }
2788
2789   /* n_map_members is a power of two, so this computes modulo.  */
2790   s = cand->id & (use->n_map_members - 1);
2791   for (i = s; i < use->n_map_members; i++)
2792     if (!use->cost_map[i].cand)
2793       goto found;
2794   for (i = 0; i < s; i++)
2795     if (!use->cost_map[i].cand)
2796       goto found;
2797
2798   gcc_unreachable ();
2799
2800 found:
2801   use->cost_map[i].cand = cand;
2802   use->cost_map[i].cost = cost;
2803   use->cost_map[i].depends_on = depends_on;
2804   use->cost_map[i].value = value;
2805   use->cost_map[i].comp = comp;
2806   use->cost_map[i].inv_expr_id = inv_expr_id;
2807 }
2808
2809 /* Gets cost of (USE, CANDIDATE) pair.  */
2810
2811 static struct cost_pair *
2812 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2813                  struct iv_cand *cand)
2814 {
2815   unsigned i, s;
2816   struct cost_pair *ret;
2817
2818   if (!cand)
2819     return NULL;
2820
2821   if (data->consider_all_candidates)
2822     {
2823       ret = use->cost_map + cand->id;
2824       if (!ret->cand)
2825         return NULL;
2826
2827       return ret;
2828     }
2829
2830   /* n_map_members is a power of two, so this computes modulo.  */
2831   s = cand->id & (use->n_map_members - 1);
2832   for (i = s; i < use->n_map_members; i++)
2833     if (use->cost_map[i].cand == cand)
2834       return use->cost_map + i;
2835     else if (use->cost_map[i].cand == NULL)
2836       return NULL;
2837   for (i = 0; i < s; i++)
2838     if (use->cost_map[i].cand == cand)
2839       return use->cost_map + i;
2840     else if (use->cost_map[i].cand == NULL)
2841       return NULL;
2842
2843   return NULL;
2844 }
2845
2846 /* Returns estimate on cost of computing SEQ.  */
2847
2848 static unsigned
2849 seq_cost (rtx_insn *seq, bool speed)
2850 {
2851   unsigned cost = 0;
2852   rtx set;
2853
2854   for (; seq; seq = NEXT_INSN (seq))
2855     {
2856       set = single_set (seq);
2857       if (set)
2858         cost += set_src_cost (SET_SRC (set), speed);
2859       else
2860         cost++;
2861     }
2862
2863   return cost;
2864 }
2865
2866 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2867 static rtx
2868 produce_memory_decl_rtl (tree obj, int *regno)
2869 {
2870   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2871   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2872   rtx x;
2873
2874   gcc_assert (obj);
2875   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2876     {
2877       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2878       x = gen_rtx_SYMBOL_REF (address_mode, name);
2879       SET_SYMBOL_REF_DECL (x, obj);
2880       x = gen_rtx_MEM (DECL_MODE (obj), x);
2881       set_mem_addr_space (x, as);
2882       targetm.encode_section_info (obj, x, true);
2883     }
2884   else
2885     {
2886       x = gen_raw_REG (address_mode, (*regno)++);
2887       x = gen_rtx_MEM (DECL_MODE (obj), x);
2888       set_mem_addr_space (x, as);
2889     }
2890
2891   return x;
2892 }
2893
2894 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2895    walk_tree.  DATA contains the actual fake register number.  */
2896
2897 static tree
2898 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2899 {
2900   tree obj = NULL_TREE;
2901   rtx x = NULL_RTX;
2902   int *regno = (int *) data;
2903
2904   switch (TREE_CODE (*expr_p))
2905     {
2906     case ADDR_EXPR:
2907       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2908            handled_component_p (*expr_p);
2909            expr_p = &TREE_OPERAND (*expr_p, 0))
2910         continue;
2911       obj = *expr_p;
2912       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2913         x = produce_memory_decl_rtl (obj, regno);
2914       break;
2915
2916     case SSA_NAME:
2917       *ws = 0;
2918       obj = SSA_NAME_VAR (*expr_p);
2919       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2920       if (!obj)
2921         return NULL_TREE;
2922       if (!DECL_RTL_SET_P (obj))
2923         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2924       break;
2925
2926     case VAR_DECL:
2927     case PARM_DECL:
2928     case RESULT_DECL:
2929       *ws = 0;
2930       obj = *expr_p;
2931
2932       if (DECL_RTL_SET_P (obj))
2933         break;
2934
2935       if (DECL_MODE (obj) == BLKmode)
2936         x = produce_memory_decl_rtl (obj, regno);
2937       else
2938         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2939
2940       break;
2941
2942     default:
2943       break;
2944     }
2945
2946   if (x)
2947     {
2948       decl_rtl_to_reset.safe_push (obj);
2949       SET_DECL_RTL (obj, x);
2950     }
2951
2952   return NULL_TREE;
2953 }
2954
2955 /* Determines cost of the computation of EXPR.  */
2956
2957 static unsigned
2958 computation_cost (tree expr, bool speed)
2959 {
2960   rtx_insn *seq;
2961   rtx rslt;
2962   tree type = TREE_TYPE (expr);
2963   unsigned cost;
2964   /* Avoid using hard regs in ways which may be unsupported.  */
2965   int regno = LAST_VIRTUAL_REGISTER + 1;
2966   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2967   enum node_frequency real_frequency = node->frequency;
2968
2969   node->frequency = NODE_FREQUENCY_NORMAL;
2970   crtl->maybe_hot_insn_p = speed;
2971   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2972   start_sequence ();
2973   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2974   seq = get_insns ();
2975   end_sequence ();
2976   default_rtl_profile ();
2977   node->frequency = real_frequency;
2978
2979   cost = seq_cost (seq, speed);
2980   if (MEM_P (rslt))
2981     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2982                           TYPE_ADDR_SPACE (type), speed);
2983   else if (!REG_P (rslt))
2984     cost += set_src_cost (rslt, speed);
2985
2986   return cost;
2987 }
2988
2989 /* Returns variable containing the value of candidate CAND at statement AT.  */
2990
2991 static tree
2992 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2993 {
2994   if (stmt_after_increment (loop, cand, stmt))
2995     return cand->var_after;
2996   else
2997     return cand->var_before;
2998 }
2999
3000 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3001    same precision that is at least as wide as the precision of TYPE, stores
3002    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3003    type of A and B.  */
3004
3005 static tree
3006 determine_common_wider_type (tree *a, tree *b)
3007 {
3008   tree wider_type = NULL;
3009   tree suba, subb;
3010   tree atype = TREE_TYPE (*a);
3011
3012   if (CONVERT_EXPR_P (*a))
3013     {
3014       suba = TREE_OPERAND (*a, 0);
3015       wider_type = TREE_TYPE (suba);
3016       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3017         return atype;
3018     }
3019   else
3020     return atype;
3021
3022   if (CONVERT_EXPR_P (*b))
3023     {
3024       subb = TREE_OPERAND (*b, 0);
3025       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3026         return atype;
3027     }
3028   else
3029     return atype;
3030
3031   *a = suba;
3032   *b = subb;
3033   return wider_type;
3034 }
3035
3036 /* Determines the expression by that USE is expressed from induction variable
3037    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3038    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3039
3040 static bool
3041 get_computation_aff (struct loop *loop,
3042                      struct iv_use *use, struct iv_cand *cand, gimple at,
3043                      struct aff_tree *aff)
3044 {
3045   tree ubase = use->iv->base;
3046   tree ustep = use->iv->step;
3047   tree cbase = cand->iv->base;
3048   tree cstep = cand->iv->step, cstep_common;
3049   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3050   tree common_type, var;
3051   tree uutype;
3052   aff_tree cbase_aff, var_aff;
3053   widest_int rat;
3054
3055   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3056     {
3057       /* We do not have a precision to express the values of use.  */
3058       return false;
3059     }
3060
3061   var = var_at_stmt (loop, cand, at);
3062   uutype = unsigned_type_for (utype);
3063
3064   /* If the conversion is not noop, perform it.  */
3065   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3066     {
3067       cstep = fold_convert (uutype, cstep);
3068       cbase = fold_convert (uutype, cbase);
3069       var = fold_convert (uutype, var);
3070     }
3071
3072   if (!constant_multiple_of (ustep, cstep, &rat))
3073     return false;
3074
3075   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3076      type, we achieve better folding by computing their difference in this
3077      wider type, and cast the result to UUTYPE.  We do not need to worry about
3078      overflows, as all the arithmetics will in the end be performed in UUTYPE
3079      anyway.  */
3080   common_type = determine_common_wider_type (&ubase, &cbase);
3081
3082   /* use = ubase - ratio * cbase + ratio * var.  */
3083   tree_to_aff_combination (ubase, common_type, aff);
3084   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3085   tree_to_aff_combination (var, uutype, &var_aff);
3086
3087   /* We need to shift the value if we are after the increment.  */
3088   if (stmt_after_increment (loop, cand, at))
3089     {
3090       aff_tree cstep_aff;
3091
3092       if (common_type != uutype)
3093         cstep_common = fold_convert (common_type, cstep);
3094       else
3095         cstep_common = cstep;
3096
3097       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3098       aff_combination_add (&cbase_aff, &cstep_aff);
3099     }
3100
3101   aff_combination_scale (&cbase_aff, -rat);
3102   aff_combination_add (aff, &cbase_aff);
3103   if (common_type != uutype)
3104     aff_combination_convert (aff, uutype);
3105
3106   aff_combination_scale (&var_aff, rat);
3107   aff_combination_add (aff, &var_aff);
3108
3109   return true;
3110 }
3111
3112 /* Return the type of USE.  */
3113
3114 static tree
3115 get_use_type (struct iv_use *use)
3116 {
3117   tree base_type = TREE_TYPE (use->iv->base);
3118   tree type;
3119
3120   if (use->type == USE_ADDRESS)
3121     {
3122       /* The base_type may be a void pointer.  Create a pointer type based on
3123          the mem_ref instead.  */
3124       type = build_pointer_type (TREE_TYPE (*use->op_p));
3125       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3126                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3127     }
3128   else
3129     type = base_type;
3130
3131   return type;
3132 }
3133
3134 /* Determines the expression by that USE is expressed from induction variable
3135    CAND at statement AT in LOOP.  The computation is unshared.  */
3136
3137 static tree
3138 get_computation_at (struct loop *loop,
3139                     struct iv_use *use, struct iv_cand *cand, gimple at)
3140 {
3141   aff_tree aff;
3142   tree type = get_use_type (use);
3143
3144   if (!get_computation_aff (loop, use, cand, at, &aff))
3145     return NULL_TREE;
3146   unshare_aff_combination (&aff);
3147   return fold_convert (type, aff_combination_to_tree (&aff));
3148 }
3149
3150 /* Determines the expression by that USE is expressed from induction variable
3151    CAND in LOOP.  The computation is unshared.  */
3152
3153 static tree
3154 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3155 {
3156   return get_computation_at (loop, use, cand, use->stmt);
3157 }
3158
3159 /* Adjust the cost COST for being in loop setup rather than loop body.
3160    If we're optimizing for space, the loop setup overhead is constant;
3161    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3162 static unsigned
3163 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3164 {
3165   if (cost == INFTY)
3166     return cost;
3167   else if (optimize_loop_for_speed_p (data->current_loop))
3168     return cost / avg_loop_niter (data->current_loop);
3169   else
3170     return cost;
3171 }
3172
3173 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3174    validity for a memory reference accessing memory of mode MODE in
3175    address space AS.  */
3176
3177
3178 bool
3179 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3180                                  addr_space_t as)
3181 {
3182 #define MAX_RATIO 128
3183   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3184   static vec<sbitmap> valid_mult_list;
3185   sbitmap valid_mult;
3186
3187   if (data_index >= valid_mult_list.length ())
3188     valid_mult_list.safe_grow_cleared (data_index + 1);
3189
3190   valid_mult = valid_mult_list[data_index];
3191   if (!valid_mult)
3192     {
3193       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3194       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3195       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3196       rtx addr, scaled;
3197       HOST_WIDE_INT i;
3198
3199       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3200       bitmap_clear (valid_mult);
3201       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3202       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3203       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3204         {
3205           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3206           if (memory_address_addr_space_p (mode, addr, as)
3207               || memory_address_addr_space_p (mode, scaled, as))
3208             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3209         }
3210
3211       if (dump_file && (dump_flags & TDF_DETAILS))
3212         {
3213           fprintf (dump_file, "  allowed multipliers:");
3214           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3215             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3216               fprintf (dump_file, " %d", (int) i);
3217           fprintf (dump_file, "\n");
3218           fprintf (dump_file, "\n");
3219         }
3220
3221       valid_mult_list[data_index] = valid_mult;
3222     }
3223
3224   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3225     return false;
3226
3227   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3228 }
3229
3230 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3231    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3232    variable is omitted.  Compute the cost for a memory reference that accesses
3233    a memory location of mode MEM_MODE in address space AS.
3234
3235    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3236    size of MEM_MODE / RATIO) is available.  To make this determination, we
3237    look at the size of the increment to be made, which is given in CSTEP.
3238    CSTEP may be zero if the step is unknown.
3239    STMT_AFTER_INC is true iff the statement we're looking at is after the
3240    increment of the original biv.
3241
3242    TODO -- there must be some better way.  This all is quite crude.  */
3243
3244 enum ainc_type
3245 {
3246   AINC_PRE_INC,         /* Pre increment.  */
3247   AINC_PRE_DEC,         /* Pre decrement.  */
3248   AINC_POST_INC,        /* Post increment.  */
3249   AINC_POST_DEC,        /* Post decrement.  */
3250   AINC_NONE             /* Also the number of auto increment types.  */
3251 };
3252
3253 typedef struct address_cost_data_s
3254 {
3255   HOST_WIDE_INT min_offset, max_offset;
3256   unsigned costs[2][2][2][2];
3257   unsigned ainc_costs[AINC_NONE];
3258 } *address_cost_data;
3259
3260
3261 static comp_cost
3262 get_address_cost (bool symbol_present, bool var_present,
3263                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3264                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3265                   addr_space_t as, bool speed,
3266                   bool stmt_after_inc, bool *may_autoinc)
3267 {
3268   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3269   static vec<address_cost_data> address_cost_data_list;
3270   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3271   address_cost_data data;
3272   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3273   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3274   unsigned cost, acost, complexity;
3275   enum ainc_type autoinc_type;
3276   bool offset_p, ratio_p, autoinc;
3277   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3278   unsigned HOST_WIDE_INT mask;
3279   unsigned bits;
3280
3281   if (data_index >= address_cost_data_list.length ())
3282     address_cost_data_list.safe_grow_cleared (data_index + 1);
3283
3284   data = address_cost_data_list[data_index];
3285   if (!data)
3286     {
3287       HOST_WIDE_INT i;
3288       HOST_WIDE_INT rat, off = 0;
3289       int old_cse_not_expected, width;
3290       unsigned sym_p, var_p, off_p, rat_p, add_c;
3291       rtx_insn *seq;
3292       rtx addr, base;
3293       rtx reg0, reg1;
3294
3295       data = (address_cost_data) xcalloc (1, sizeof (*data));
3296
3297       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3298
3299       width = GET_MODE_BITSIZE (address_mode) - 1;
3300       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3301         width = HOST_BITS_PER_WIDE_INT - 1;
3302       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3303
3304       for (i = width; i >= 0; i--)
3305         {
3306           off = -((unsigned HOST_WIDE_INT) 1 << i);
3307           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3308           if (memory_address_addr_space_p (mem_mode, addr, as))
3309             break;
3310         }
3311       data->min_offset = (i == -1? 0 : off);
3312
3313       for (i = width; i >= 0; i--)
3314         {
3315           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3316           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3317           if (memory_address_addr_space_p (mem_mode, addr, as))
3318             break;
3319           /* For some TARGET, like ARM THUMB1, the offset should be nature
3320              aligned.  Try an aligned offset if address_mode is not QImode.  */
3321           off = (address_mode == QImode)
3322                 ? 0
3323                 : ((unsigned HOST_WIDE_INT) 1 << i)
3324                     - GET_MODE_SIZE (address_mode);
3325           if (off > 0)
3326             {
3327               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3328               if (memory_address_addr_space_p (mem_mode, addr, as))
3329                 break;
3330             }
3331         }
3332       if (i == -1)
3333         off = 0;
3334       data->max_offset = off;
3335
3336       if (dump_file && (dump_flags & TDF_DETAILS))
3337         {
3338           fprintf (dump_file, "get_address_cost:\n");
3339           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3340                    GET_MODE_NAME (mem_mode),
3341                    data->min_offset);
3342           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3343                    GET_MODE_NAME (mem_mode),
3344                    data->max_offset);
3345         }
3346
3347       rat = 1;
3348       for (i = 2; i <= MAX_RATIO; i++)
3349         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3350           {
3351             rat = i;
3352             break;
3353           }
3354
3355       /* Compute the cost of various addressing modes.  */
3356       acost = 0;
3357       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3358       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3359
3360       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3361           || USE_STORE_PRE_DECREMENT (mem_mode))
3362         {
3363           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3364           has_predec[mem_mode]
3365             = memory_address_addr_space_p (mem_mode, addr, as);
3366
3367           if (has_predec[mem_mode])
3368             data->ainc_costs[AINC_PRE_DEC]
3369               = address_cost (addr, mem_mode, as, speed);
3370         }
3371       if (USE_LOAD_POST_DECREMENT (mem_mode)
3372           || USE_STORE_POST_DECREMENT (mem_mode))
3373         {
3374           addr = gen_rtx_POST_DEC (address_mode, reg0);
3375           has_postdec[mem_mode]
3376             = memory_address_addr_space_p (mem_mode, addr, as);
3377
3378           if (has_postdec[mem_mode])
3379             data->ainc_costs[AINC_POST_DEC]
3380               = address_cost (addr, mem_mode, as, speed);
3381         }
3382       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3383           || USE_STORE_PRE_DECREMENT (mem_mode))
3384         {
3385           addr = gen_rtx_PRE_INC (address_mode, reg0);
3386           has_preinc[mem_mode]
3387             = memory_address_addr_space_p (mem_mode, addr, as);
3388
3389           if (has_preinc[mem_mode])
3390             data->ainc_costs[AINC_PRE_INC]
3391               = address_cost (addr, mem_mode, as, speed);
3392         }
3393       if (USE_LOAD_POST_INCREMENT (mem_mode)
3394           || USE_STORE_POST_INCREMENT (mem_mode))
3395         {
3396           addr = gen_rtx_POST_INC (address_mode, reg0);
3397           has_postinc[mem_mode]
3398             = memory_address_addr_space_p (mem_mode, addr, as);
3399
3400           if (has_postinc[mem_mode])
3401             data->ainc_costs[AINC_POST_INC]
3402               = address_cost (addr, mem_mode, as, speed);
3403         }
3404       for (i = 0; i < 16; i++)
3405         {
3406           sym_p = i & 1;
3407           var_p = (i >> 1) & 1;
3408           off_p = (i >> 2) & 1;
3409           rat_p = (i >> 3) & 1;
3410
3411           addr = reg0;
3412           if (rat_p)
3413             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3414                                    gen_int_mode (rat, address_mode));
3415
3416           if (var_p)
3417             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3418
3419           if (sym_p)
3420             {
3421               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3422               /* ??? We can run into trouble with some backends by presenting
3423                  it with symbols which haven't been properly passed through
3424                  targetm.encode_section_info.  By setting the local bit, we
3425                  enhance the probability of things working.  */
3426               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3427
3428               if (off_p)
3429                 base = gen_rtx_fmt_e (CONST, address_mode,
3430                                       gen_rtx_fmt_ee
3431                                         (PLUS, address_mode, base,
3432                                          gen_int_mode (off, address_mode)));
3433             }
3434           else if (off_p)
3435             base = gen_int_mode (off, address_mode);
3436           else
3437             base = NULL_RTX;
3438
3439           if (base)
3440             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3441
3442           start_sequence ();
3443           /* To avoid splitting addressing modes, pretend that no cse will
3444              follow.  */
3445           old_cse_not_expected = cse_not_expected;
3446           cse_not_expected = true;
3447           addr = memory_address_addr_space (mem_mode, addr, as);
3448           cse_not_expected = old_cse_not_expected;
3449           seq = get_insns ();
3450           end_sequence ();
3451
3452           acost = seq_cost (seq, speed);
3453           acost += address_cost (addr, mem_mode, as, speed);
3454
3455           if (!acost)
3456             acost = 1;
3457           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3458         }
3459
3460       /* On some targets, it is quite expensive to load symbol to a register,
3461          which makes addresses that contain symbols look much more expensive.
3462          However, the symbol will have to be loaded in any case before the
3463          loop (and quite likely we have it in register already), so it does not
3464          make much sense to penalize them too heavily.  So make some final
3465          tweaks for the SYMBOL_PRESENT modes:
3466
3467          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3468          var is cheaper, use this mode with small penalty.
3469          If VAR_PRESENT is true, try whether the mode with
3470          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3471          if this is the case, use it.  */
3472       add_c = add_cost (speed, address_mode);
3473       for (i = 0; i < 8; i++)
3474         {
3475           var_p = i & 1;
3476           off_p = (i >> 1) & 1;
3477           rat_p = (i >> 2) & 1;
3478
3479           acost = data->costs[0][1][off_p][rat_p] + 1;
3480           if (var_p)
3481             acost += add_c;
3482
3483           if (acost < data->costs[1][var_p][off_p][rat_p])
3484             data->costs[1][var_p][off_p][rat_p] = acost;
3485         }
3486
3487       if (dump_file && (dump_flags & TDF_DETAILS))
3488         {
3489           fprintf (dump_file, "Address costs:\n");
3490
3491           for (i = 0; i < 16; i++)
3492             {
3493               sym_p = i & 1;
3494               var_p = (i >> 1) & 1;
3495               off_p = (i >> 2) & 1;
3496               rat_p = (i >> 3) & 1;
3497
3498               fprintf (dump_file, "  ");
3499               if (sym_p)
3500                 fprintf (dump_file, "sym + ");
3501               if (var_p)
3502                 fprintf (dump_file, "var + ");
3503               if (off_p)
3504                 fprintf (dump_file, "cst + ");
3505               if (rat_p)
3506                 fprintf (dump_file, "rat * ");
3507
3508               acost = data->costs[sym_p][var_p][off_p][rat_p];
3509               fprintf (dump_file, "index costs %d\n", acost);
3510             }
3511           if (has_predec[mem_mode] || has_postdec[mem_mode]
3512               || has_preinc[mem_mode] || has_postinc[mem_mode])
3513             fprintf (dump_file, "  May include autoinc/dec\n");
3514           fprintf (dump_file, "\n");
3515         }
3516
3517       address_cost_data_list[data_index] = data;
3518     }
3519
3520   bits = GET_MODE_BITSIZE (address_mode);
3521   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3522   offset &= mask;
3523   if ((offset >> (bits - 1) & 1))
3524     offset |= ~mask;
3525   s_offset = offset;
3526
3527   autoinc = false;
3528   autoinc_type = AINC_NONE;
3529   msize = GET_MODE_SIZE (mem_mode);
3530   autoinc_offset = offset;
3531   if (stmt_after_inc)
3532     autoinc_offset += ratio * cstep;
3533   if (symbol_present || var_present || ratio != 1)
3534     autoinc = false;
3535   else
3536     {
3537       if (has_postinc[mem_mode] && autoinc_offset == 0
3538           && msize == cstep)
3539         autoinc_type = AINC_POST_INC;
3540       else if (has_postdec[mem_mode] && autoinc_offset == 0
3541                && msize == -cstep)
3542         autoinc_type = AINC_POST_DEC;
3543       else if (has_preinc[mem_mode] && autoinc_offset == msize
3544                && msize == cstep)
3545         autoinc_type = AINC_PRE_INC;
3546       else if (has_predec[mem_mode] && autoinc_offset == -msize
3547                && msize == -cstep)
3548         autoinc_type = AINC_PRE_DEC;
3549
3550       if (autoinc_type != AINC_NONE)
3551         autoinc = true;
3552     }
3553
3554   cost = 0;
3555   offset_p = (s_offset != 0
3556               && data->min_offset <= s_offset
3557               && s_offset <= data->max_offset);
3558   ratio_p = (ratio != 1
3559              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3560
3561   if (ratio != 1 && !ratio_p)
3562     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3563
3564   if (s_offset && !offset_p && !symbol_present)
3565     cost += add_cost (speed, address_mode);
3566
3567   if (may_autoinc)
3568     *may_autoinc = autoinc;
3569   if (autoinc)
3570     acost = data->ainc_costs[autoinc_type];
3571   else
3572     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3573   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3574   return new_cost (cost + acost, complexity);
3575 }
3576
3577  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3578     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3579     calculating the operands of EXPR.  Returns true if successful, and returns
3580     the cost in COST.  */
3581
3582 static bool
3583 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3584                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3585 {
3586   comp_cost res;
3587   tree op1 = TREE_OPERAND (expr, 1);
3588   tree cst = TREE_OPERAND (mult, 1);
3589   tree multop = TREE_OPERAND (mult, 0);
3590   int m = exact_log2 (int_cst_value (cst));
3591   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3592   int sa_cost;
3593   bool equal_p = false;
3594
3595   if (!(m >= 0 && m < maxm))
3596     return false;
3597
3598   if (operand_equal_p (op1, mult, 0))
3599     equal_p = true;
3600
3601   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3602              ? shiftadd_cost (speed, mode, m)
3603              : (equal_p
3604                 ? shiftsub1_cost (speed, mode, m)
3605                 : shiftsub0_cost (speed, mode, m)));
3606   res = new_cost (sa_cost, 0);
3607   res = add_costs (res, equal_p ? cost0 : cost1);
3608
3609   STRIP_NOPS (multop);
3610   if (!is_gimple_val (multop))
3611     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3612
3613   *cost = res;
3614   return true;
3615 }
3616
3617 /* Estimates cost of forcing expression EXPR into a variable.  */
3618
3619 static comp_cost
3620 force_expr_to_var_cost (tree expr, bool speed)
3621 {
3622   static bool costs_initialized = false;
3623   static unsigned integer_cost [2];
3624   static unsigned symbol_cost [2];
3625   static unsigned address_cost [2];
3626   tree op0, op1;
3627   comp_cost cost0, cost1, cost;
3628   enum machine_mode mode;
3629
3630   if (!costs_initialized)
3631     {
3632       tree type = build_pointer_type (integer_type_node);
3633       tree var, addr;
3634       rtx x;
3635       int i;
3636
3637       var = create_tmp_var_raw (integer_type_node, "test_var");
3638       TREE_STATIC (var) = 1;
3639       x = produce_memory_decl_rtl (var, NULL);
3640       SET_DECL_RTL (var, x);
3641
3642       addr = build1 (ADDR_EXPR, type, var);
3643
3644
3645       for (i = 0; i < 2; i++)
3646         {
3647           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3648                                                              2000), i);
3649
3650           symbol_cost[i] = computation_cost (addr, i) + 1;
3651
3652           address_cost[i]
3653             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3654           if (dump_file && (dump_flags & TDF_DETAILS))
3655             {
3656               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3657               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3658               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3659               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3660               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3661               fprintf (dump_file, "\n");
3662             }
3663         }
3664
3665       costs_initialized = true;
3666     }
3667
3668   STRIP_NOPS (expr);
3669
3670   if (SSA_VAR_P (expr))
3671     return no_cost;
3672
3673   if (is_gimple_min_invariant (expr))
3674     {
3675       if (TREE_CODE (expr) == INTEGER_CST)
3676         return new_cost (integer_cost [speed], 0);
3677
3678       if (TREE_CODE (expr) == ADDR_EXPR)
3679         {
3680           tree obj = TREE_OPERAND (expr, 0);
3681
3682           if (TREE_CODE (obj) == VAR_DECL
3683               || TREE_CODE (obj) == PARM_DECL
3684               || TREE_CODE (obj) == RESULT_DECL)
3685             return new_cost (symbol_cost [speed], 0);
3686         }
3687
3688       return new_cost (address_cost [speed], 0);
3689     }
3690
3691   switch (TREE_CODE (expr))
3692     {
3693     case POINTER_PLUS_EXPR:
3694     case PLUS_EXPR:
3695     case MINUS_EXPR:
3696     case MULT_EXPR:
3697       op0 = TREE_OPERAND (expr, 0);
3698       op1 = TREE_OPERAND (expr, 1);
3699       STRIP_NOPS (op0);
3700       STRIP_NOPS (op1);
3701       break;
3702
3703     CASE_CONVERT:
3704     case NEGATE_EXPR:
3705       op0 = TREE_OPERAND (expr, 0);
3706       STRIP_NOPS (op0);
3707       op1 = NULL_TREE;
3708       break;
3709
3710     default:
3711       /* Just an arbitrary value, FIXME.  */
3712       return new_cost (target_spill_cost[speed], 0);
3713     }
3714
3715   if (op0 == NULL_TREE
3716       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3717     cost0 = no_cost;
3718   else
3719     cost0 = force_expr_to_var_cost (op0, speed);
3720
3721   if (op1 == NULL_TREE
3722       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3723     cost1 = no_cost;
3724   else
3725     cost1 = force_expr_to_var_cost (op1, speed);
3726
3727   mode = TYPE_MODE (TREE_TYPE (expr));
3728   switch (TREE_CODE (expr))
3729     {
3730     case POINTER_PLUS_EXPR:
3731     case PLUS_EXPR:
3732     case MINUS_EXPR:
3733     case NEGATE_EXPR:
3734       cost = new_cost (add_cost (speed, mode), 0);
3735       if (TREE_CODE (expr) != NEGATE_EXPR)
3736         {
3737           tree mult = NULL_TREE;
3738           comp_cost sa_cost;
3739           if (TREE_CODE (op1) == MULT_EXPR)
3740             mult = op1;
3741           else if (TREE_CODE (op0) == MULT_EXPR)
3742             mult = op0;
3743
3744           if (mult != NULL_TREE
3745               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3746               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3747                                     speed, &sa_cost))
3748             return sa_cost;
3749         }
3750       break;
3751
3752     CASE_CONVERT:
3753       {
3754         tree inner_mode, outer_mode;
3755         outer_mode = TREE_TYPE (expr);
3756         inner_mode = TREE_TYPE (op0);
3757         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3758                                        TYPE_MODE (inner_mode), speed), 0);
3759       }
3760       break;
3761
3762     case MULT_EXPR:
3763       if (cst_and_fits_in_hwi (op0))
3764         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3765                                              mode, speed), 0);
3766       else if (cst_and_fits_in_hwi (op1))
3767         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3768                                              mode, speed), 0);
3769       else
3770         return new_cost (target_spill_cost [speed], 0);
3771       break;
3772
3773     default:
3774       gcc_unreachable ();
3775     }
3776
3777   cost = add_costs (cost, cost0);
3778   cost = add_costs (cost, cost1);
3779
3780   /* Bound the cost by target_spill_cost.  The parts of complicated
3781      computations often are either loop invariant or at least can
3782      be shared between several iv uses, so letting this grow without
3783      limits would not give reasonable results.  */
3784   if (cost.cost > (int) target_spill_cost [speed])
3785     cost.cost = target_spill_cost [speed];
3786
3787   return cost;
3788 }
3789
3790 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3791    invariants the computation depends on.  */
3792
3793 static comp_cost
3794 force_var_cost (struct ivopts_data *data,
3795                 tree expr, bitmap *depends_on)
3796 {
3797   if (depends_on)
3798     {
3799       fd_ivopts_data = data;
3800       walk_tree (&expr, find_depends, depends_on, NULL);
3801     }
3802
3803   return force_expr_to_var_cost (expr, data->speed);
3804 }
3805
3806 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3807    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3808    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3809    invariants the computation depends on.  */
3810
3811 static comp_cost
3812 split_address_cost (struct ivopts_data *data,
3813                     tree addr, bool *symbol_present, bool *var_present,
3814                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3815 {
3816   tree core;
3817   HOST_WIDE_INT bitsize;
3818   HOST_WIDE_INT bitpos;
3819   tree toffset;
3820   enum machine_mode mode;
3821   int unsignedp, volatilep;
3822
3823   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3824                               &unsignedp, &volatilep, false);
3825
3826   if (toffset != 0
3827       || bitpos % BITS_PER_UNIT != 0
3828       || TREE_CODE (core) != VAR_DECL)
3829     {
3830       *symbol_present = false;
3831       *var_present = true;
3832       fd_ivopts_data = data;
3833       walk_tree (&addr, find_depends, depends_on, NULL);
3834       return new_cost (target_spill_cost[data->speed], 0);
3835     }
3836
3837   *offset += bitpos / BITS_PER_UNIT;
3838   if (TREE_STATIC (core)
3839       || DECL_EXTERNAL (core))
3840     {
3841       *symbol_present = true;
3842       *var_present = false;
3843       return no_cost;
3844     }
3845
3846   *symbol_present = false;
3847   *var_present = true;
3848   return no_cost;
3849 }
3850
3851 /* Estimates cost of expressing difference of addresses E1 - E2 as
3852    var + symbol + offset.  The value of offset is added to OFFSET,
3853    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3854    part is missing.  DEPENDS_ON is a set of the invariants the computation
3855    depends on.  */
3856
3857 static comp_cost
3858 ptr_difference_cost (struct ivopts_data *data,
3859                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3860                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3861 {
3862   HOST_WIDE_INT diff = 0;
3863   aff_tree aff_e1, aff_e2;
3864   tree type;
3865
3866   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3867
3868   if (ptr_difference_const (e1, e2, &diff))
3869     {
3870       *offset += diff;
3871       *symbol_present = false;
3872       *var_present = false;
3873       return no_cost;
3874     }
3875
3876   if (integer_zerop (e2))
3877     return split_address_cost (data, TREE_OPERAND (e1, 0),
3878                                symbol_present, var_present, offset, depends_on);
3879
3880   *symbol_present = false;
3881   *var_present = true;
3882
3883   type = signed_type_for (TREE_TYPE (e1));
3884   tree_to_aff_combination (e1, type, &aff_e1);
3885   tree_to_aff_combination (e2, type, &aff_e2);
3886   aff_combination_scale (&aff_e2, -1);
3887   aff_combination_add (&aff_e1, &aff_e2);
3888
3889   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3890 }
3891
3892 /* Estimates cost of expressing difference E1 - E2 as
3893    var + symbol + offset.  The value of offset is added to OFFSET,
3894    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3895    part is missing.  DEPENDS_ON is a set of the invariants the computation
3896    depends on.  */
3897
3898 static comp_cost
3899 difference_cost (struct ivopts_data *data,
3900                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3901                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3902 {
3903   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3904   unsigned HOST_WIDE_INT off1, off2;
3905   aff_tree aff_e1, aff_e2;
3906   tree type;
3907
3908   e1 = strip_offset (e1, &off1);
3909   e2 = strip_offset (e2, &off2);
3910   *offset += off1 - off2;
3911
3912   STRIP_NOPS (e1);
3913   STRIP_NOPS (e2);
3914
3915   if (TREE_CODE (e1) == ADDR_EXPR)
3916     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3917                                 offset, depends_on);
3918   *symbol_present = false;
3919
3920   if (operand_equal_p (e1, e2, 0))
3921     {
3922       *var_present = false;
3923       return no_cost;
3924     }
3925
3926   *var_present = true;
3927
3928   if (integer_zerop (e2))
3929     return force_var_cost (data, e1, depends_on);
3930
3931   if (integer_zerop (e1))
3932     {
3933       comp_cost cost = force_var_cost (data, e2, depends_on);
3934       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3935       return cost;
3936     }
3937
3938   type = signed_type_for (TREE_TYPE (e1));
3939   tree_to_aff_combination (e1, type, &aff_e1);
3940   tree_to_aff_combination (e2, type, &aff_e2);
3941   aff_combination_scale (&aff_e2, -1);
3942   aff_combination_add (&aff_e1, &aff_e2);
3943
3944   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3945 }
3946
3947 /* Returns true if AFF1 and AFF2 are identical.  */
3948
3949 static bool
3950 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3951 {
3952   unsigned i;
3953
3954   if (aff1->n != aff2->n)
3955     return false;
3956
3957   for (i = 0; i < aff1->n; i++)
3958     {
3959       if (aff1->elts[i].coef != aff2->elts[i].coef)
3960         return false;
3961
3962       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3963         return false;
3964     }
3965   return true;
3966 }
3967
3968 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3969
3970 static int
3971 get_expr_id (struct ivopts_data *data, tree expr)
3972 {
3973   struct iv_inv_expr_ent ent;
3974   struct iv_inv_expr_ent **slot;
3975
3976   ent.expr = expr;
3977   ent.hash = iterative_hash_expr (expr, 0);
3978   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3979   if (*slot)
3980     return (*slot)->id;
3981
3982   *slot = XNEW (struct iv_inv_expr_ent);
3983   (*slot)->expr = expr;
3984   (*slot)->hash = ent.hash;
3985   (*slot)->id = data->inv_expr_id++;
3986   return (*slot)->id;
3987 }
3988
3989 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3990    requires a new compiler generated temporary.  Returns -1 otherwise.
3991    ADDRESS_P is a flag indicating if the expression is for address
3992    computation.  */
3993
3994 static int
3995 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3996                             tree cbase, HOST_WIDE_INT ratio,
3997                             bool address_p)
3998 {
3999   aff_tree ubase_aff, cbase_aff;
4000   tree expr, ub, cb;
4001
4002   STRIP_NOPS (ubase);
4003   STRIP_NOPS (cbase);
4004   ub = ubase;
4005   cb = cbase;
4006
4007   if ((TREE_CODE (ubase) == INTEGER_CST)
4008       && (TREE_CODE (cbase) == INTEGER_CST))
4009     return -1;
4010
4011   /* Strips the constant part. */
4012   if (TREE_CODE (ubase) == PLUS_EXPR
4013       || TREE_CODE (ubase) == MINUS_EXPR
4014       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4015     {
4016       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4017         ubase = TREE_OPERAND (ubase, 0);
4018     }
4019
4020   /* Strips the constant part. */
4021   if (TREE_CODE (cbase) == PLUS_EXPR
4022       || TREE_CODE (cbase) == MINUS_EXPR
4023       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4024     {
4025       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4026         cbase = TREE_OPERAND (cbase, 0);
4027     }
4028
4029   if (address_p)
4030     {
4031       if (((TREE_CODE (ubase) == SSA_NAME)
4032            || (TREE_CODE (ubase) == ADDR_EXPR
4033                && is_gimple_min_invariant (ubase)))
4034           && (TREE_CODE (cbase) == INTEGER_CST))
4035         return -1;
4036
4037       if (((TREE_CODE (cbase) == SSA_NAME)
4038            || (TREE_CODE (cbase) == ADDR_EXPR
4039                && is_gimple_min_invariant (cbase)))
4040           && (TREE_CODE (ubase) == INTEGER_CST))
4041         return -1;
4042     }
4043
4044   if (ratio == 1)
4045     {
4046       if (operand_equal_p (ubase, cbase, 0))
4047         return -1;
4048
4049       if (TREE_CODE (ubase) == ADDR_EXPR
4050           && TREE_CODE (cbase) == ADDR_EXPR)
4051         {
4052           tree usym, csym;
4053
4054           usym = TREE_OPERAND (ubase, 0);
4055           csym = TREE_OPERAND (cbase, 0);
4056           if (TREE_CODE (usym) == ARRAY_REF)
4057             {
4058               tree ind = TREE_OPERAND (usym, 1);
4059               if (TREE_CODE (ind) == INTEGER_CST
4060                   && tree_fits_shwi_p (ind)
4061                   && tree_to_shwi (ind) == 0)
4062                 usym = TREE_OPERAND (usym, 0);
4063             }
4064           if (TREE_CODE (csym) == ARRAY_REF)
4065             {
4066               tree ind = TREE_OPERAND (csym, 1);
4067               if (TREE_CODE (ind) == INTEGER_CST
4068                   && tree_fits_shwi_p (ind)
4069                   && tree_to_shwi (ind) == 0)
4070                 csym = TREE_OPERAND (csym, 0);
4071             }
4072           if (operand_equal_p (usym, csym, 0))
4073             return -1;
4074         }
4075       /* Now do more complex comparison  */
4076       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4077       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4078       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4079         return -1;
4080     }
4081
4082   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4083   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4084
4085   aff_combination_scale (&cbase_aff, -1 * ratio);
4086   aff_combination_add (&ubase_aff, &cbase_aff);
4087   expr = aff_combination_to_tree (&ubase_aff);
4088   return get_expr_id (data, expr);
4089 }
4090
4091
4092
4093 /* Determines the cost of the computation by that USE is expressed
4094    from induction variable CAND.  If ADDRESS_P is true, we just need
4095    to create an address from it, otherwise we want to get it into
4096    register.  A set of invariants we depend on is stored in
4097    DEPENDS_ON.  AT is the statement at that the value is computed.
4098    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4099    addressing is likely.  */
4100
4101 static comp_cost
4102 get_computation_cost_at (struct ivopts_data *data,
4103                          struct iv_use *use, struct iv_cand *cand,
4104                          bool address_p, bitmap *depends_on, gimple at,
4105                          bool *can_autoinc,
4106                          int *inv_expr_id)
4107 {
4108   tree ubase = use->iv->base, ustep = use->iv->step;
4109   tree cbase, cstep;
4110   tree utype = TREE_TYPE (ubase), ctype;
4111   unsigned HOST_WIDE_INT cstepi, offset = 0;
4112   HOST_WIDE_INT ratio, aratio;
4113   bool var_present, symbol_present, stmt_is_after_inc;
4114   comp_cost cost;
4115   widest_int rat;
4116   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4117   enum machine_mode mem_mode = (address_p
4118                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4119                                 : VOIDmode);
4120
4121   *depends_on = NULL;
4122
4123   /* Only consider real candidates.  */
4124   if (!cand->iv)
4125     return infinite_cost;
4126
4127   cbase = cand->iv->base;
4128   cstep = cand->iv->step;
4129   ctype = TREE_TYPE (cbase);
4130
4131   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4132     {
4133       /* We do not have a precision to express the values of use.  */
4134       return infinite_cost;
4135     }
4136
4137   if (address_p
4138       || (use->iv->base_object
4139           && cand->iv->base_object
4140           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4141           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4142     {
4143       /* Do not try to express address of an object with computation based
4144          on address of a different object.  This may cause problems in rtl
4145          level alias analysis (that does not expect this to be happening,
4146          as this is illegal in C), and would be unlikely to be useful
4147          anyway.  */
4148       if (use->iv->base_object
4149           && cand->iv->base_object
4150           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4151         return infinite_cost;
4152     }
4153
4154   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4155     {
4156       /* TODO -- add direct handling of this case.  */
4157       goto fallback;
4158     }
4159
4160   /* CSTEPI is removed from the offset in case statement is after the
4161      increment.  If the step is not constant, we use zero instead.
4162      This is a bit imprecise (there is the extra addition), but
4163      redundancy elimination is likely to transform the code so that
4164      it uses value of the variable before increment anyway,
4165      so it is not that much unrealistic.  */
4166   if (cst_and_fits_in_hwi (cstep))
4167     cstepi = int_cst_value (cstep);
4168   else
4169     cstepi = 0;
4170
4171   if (!constant_multiple_of (ustep, cstep, &rat))
4172     return infinite_cost;
4173
4174   if (wi::fits_shwi_p (rat))
4175     ratio = rat.to_shwi ();
4176   else
4177     return infinite_cost;
4178
4179   STRIP_NOPS (cbase);
4180   ctype = TREE_TYPE (cbase);
4181
4182   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4183
4184   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4185      or ratio == 1, it is better to handle this like
4186
4187      ubase - ratio * cbase + ratio * var
4188
4189      (also holds in the case ratio == -1, TODO.  */
4190
4191   if (cst_and_fits_in_hwi (cbase))
4192     {
4193       offset = - ratio * int_cst_value (cbase);
4194       cost = difference_cost (data,
4195                               ubase, build_int_cst (utype, 0),
4196                               &symbol_present, &var_present, &offset,
4197                               depends_on);
4198       cost.cost /= avg_loop_niter (data->current_loop);
4199     }
4200   else if (ratio == 1)
4201     {
4202       tree real_cbase = cbase;
4203
4204       /* Check to see if any adjustment is needed.  */
4205       if (cstepi == 0 && stmt_is_after_inc)
4206         {
4207           aff_tree real_cbase_aff;
4208           aff_tree cstep_aff;
4209
4210           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4211                                    &real_cbase_aff);
4212           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4213
4214           aff_combination_add (&real_cbase_aff, &cstep_aff);
4215           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4216         }
4217
4218       cost = difference_cost (data,
4219                               ubase, real_cbase,
4220                               &symbol_present, &var_present, &offset,
4221                               depends_on);
4222       cost.cost /= avg_loop_niter (data->current_loop);
4223     }
4224   else if (address_p
4225            && !POINTER_TYPE_P (ctype)
4226            && multiplier_allowed_in_address_p
4227                 (ratio, mem_mode,
4228                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4229     {
4230       cbase
4231         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4232       cost = difference_cost (data,
4233                               ubase, cbase,
4234                               &symbol_present, &var_present, &offset,
4235                               depends_on);
4236       cost.cost /= avg_loop_niter (data->current_loop);
4237     }
4238   else
4239     {
4240       cost = force_var_cost (data, cbase, depends_on);
4241       cost = add_costs (cost,
4242                         difference_cost (data,
4243                                          ubase, build_int_cst (utype, 0),
4244                                          &symbol_present, &var_present,
4245                                          &offset, depends_on));
4246       cost.cost /= avg_loop_niter (data->current_loop);
4247       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4248     }
4249
4250   if (inv_expr_id)
4251     {
4252       *inv_expr_id =
4253           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4254       /* Clear depends on.  */
4255       if (*inv_expr_id != -1 && depends_on && *depends_on)
4256         bitmap_clear (*depends_on);
4257     }
4258
4259   /* If we are after the increment, the value of the candidate is higher by
4260      one iteration.  */
4261   if (stmt_is_after_inc)
4262     offset -= ratio * cstepi;
4263
4264   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4265      (symbol/var1/const parts may be omitted).  If we are looking for an
4266      address, find the cost of addressing this.  */
4267   if (address_p)
4268     return add_costs (cost,
4269                       get_address_cost (symbol_present, var_present,
4270                                         offset, ratio, cstepi,
4271                                         mem_mode,
4272                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4273                                         speed, stmt_is_after_inc,
4274                                         can_autoinc));
4275
4276   /* Otherwise estimate the costs for computing the expression.  */
4277   if (!symbol_present && !var_present && !offset)
4278     {
4279       if (ratio != 1)
4280         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4281       return cost;
4282     }
4283
4284   /* Symbol + offset should be compile-time computable so consider that they
4285       are added once to the variable, if present.  */
4286   if (var_present && (symbol_present || offset))
4287     cost.cost += adjust_setup_cost (data,
4288                                     add_cost (speed, TYPE_MODE (ctype)));
4289
4290   /* Having offset does not affect runtime cost in case it is added to
4291      symbol, but it increases complexity.  */
4292   if (offset)
4293     cost.complexity++;
4294
4295   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4296
4297   aratio = ratio > 0 ? ratio : -ratio;
4298   if (aratio != 1)
4299     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4300   return cost;
4301
4302 fallback:
4303   if (can_autoinc)
4304     *can_autoinc = false;
4305
4306   {
4307     /* Just get the expression, expand it and measure the cost.  */
4308     tree comp = get_computation_at (data->current_loop, use, cand, at);
4309
4310     if (!comp)
4311       return infinite_cost;
4312
4313     if (address_p)
4314       comp = build_simple_mem_ref (comp);
4315
4316     return new_cost (computation_cost (comp, speed), 0);
4317   }
4318 }
4319
4320 /* Determines the cost of the computation by that USE is expressed
4321    from induction variable CAND.  If ADDRESS_P is true, we just need
4322    to create an address from it, otherwise we want to get it into
4323    register.  A set of invariants we depend on is stored in
4324    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4325    autoinc addressing is likely.  */
4326
4327 static comp_cost
4328 get_computation_cost (struct ivopts_data *data,
4329                       struct iv_use *use, struct iv_cand *cand,
4330                       bool address_p, bitmap *depends_on,
4331                       bool *can_autoinc, int *inv_expr_id)
4332 {
4333   return get_computation_cost_at (data,
4334                                   use, cand, address_p, depends_on, use->stmt,
4335                                   can_autoinc, inv_expr_id);
4336 }
4337
4338 /* Determines cost of basing replacement of USE on CAND in a generic
4339    expression.  */
4340
4341 static bool
4342 determine_use_iv_cost_generic (struct ivopts_data *data,
4343                                struct iv_use *use, struct iv_cand *cand)
4344 {
4345   bitmap depends_on;
4346   comp_cost cost;
4347   int inv_expr_id = -1;
4348
4349   /* The simple case first -- if we need to express value of the preserved
4350      original biv, the cost is 0.  This also prevents us from counting the
4351      cost of increment twice -- once at this use and once in the cost of
4352      the candidate.  */
4353   if (cand->pos == IP_ORIGINAL
4354       && cand->incremented_at == use->stmt)
4355     {
4356       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4357                        ERROR_MARK, -1);
4358       return true;
4359     }
4360
4361   cost = get_computation_cost (data, use, cand, false, &depends_on,
4362                                NULL, &inv_expr_id);
4363
4364   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4365                    inv_expr_id);
4366
4367   return !infinite_cost_p (cost);
4368 }
4369
4370 /* Determines cost of basing replacement of USE on CAND in an address.  */
4371
4372 static bool
4373 determine_use_iv_cost_address (struct ivopts_data *data,
4374                                struct iv_use *use, struct iv_cand *cand)
4375 {
4376   bitmap depends_on;
4377   bool can_autoinc;
4378   int inv_expr_id = -1;
4379   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4380                                          &can_autoinc, &inv_expr_id);
4381
4382   if (cand->ainc_use == use)
4383     {
4384       if (can_autoinc)
4385         cost.cost -= cand->cost_step;
4386       /* If we generated the candidate solely for exploiting autoincrement
4387          opportunities, and it turns out it can't be used, set the cost to
4388          infinity to make sure we ignore it.  */
4389       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4390         cost = infinite_cost;
4391     }
4392   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4393                    inv_expr_id);
4394
4395   return !infinite_cost_p (cost);
4396 }
4397
4398 /* Computes value of candidate CAND at position AT in iteration NITER, and
4399    stores it to VAL.  */
4400
4401 static void
4402 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4403                aff_tree *val)
4404 {
4405   aff_tree step, delta, nit;
4406   struct iv *iv = cand->iv;
4407   tree type = TREE_TYPE (iv->base);
4408   tree steptype = type;
4409   if (POINTER_TYPE_P (type))
4410     steptype = sizetype;
4411   steptype = unsigned_type_for (type);
4412
4413   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4414   aff_combination_convert (&step, steptype);
4415   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4416   aff_combination_convert (&nit, steptype);
4417   aff_combination_mult (&nit, &step, &delta);
4418   if (stmt_after_increment (loop, cand, at))
4419     aff_combination_add (&delta, &step);
4420
4421   tree_to_aff_combination (iv->base, type, val);
4422   if (!POINTER_TYPE_P (type))
4423     aff_combination_convert (val, steptype);
4424   aff_combination_add (val, &delta);
4425 }
4426
4427 /* Returns period of induction variable iv.  */
4428
4429 static tree
4430 iv_period (struct iv *iv)
4431 {
4432   tree step = iv->step, period, type;
4433   tree pow2div;
4434
4435   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4436
4437   type = unsigned_type_for (TREE_TYPE (step));
4438   /* Period of the iv is lcm (step, type_range)/step -1,
4439      i.e., N*type_range/step - 1. Since type range is power
4440      of two, N == (step >> num_of_ending_zeros_binary (step),
4441      so the final result is
4442
4443        (type_range >> num_of_ending_zeros_binary (step)) - 1
4444
4445   */
4446   pow2div = num_ending_zeros (step);
4447
4448   period = build_low_bits_mask (type,
4449                                 (TYPE_PRECISION (type)
4450                                  - tree_to_uhwi (pow2div)));
4451
4452   return period;
4453 }
4454
4455 /* Returns the comparison operator used when eliminating the iv USE.  */
4456
4457 static enum tree_code
4458 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4459 {
4460   struct loop *loop = data->current_loop;
4461   basic_block ex_bb;
4462   edge exit;
4463
4464   ex_bb = gimple_bb (use->stmt);
4465   exit = EDGE_SUCC (ex_bb, 0);
4466   if (flow_bb_inside_loop_p (loop, exit->dest))
4467     exit = EDGE_SUCC (ex_bb, 1);
4468
4469   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4470 }
4471
4472 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4473    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4474    calculation is performed in non-wrapping type.
4475
4476    TODO: More generally, we could test for the situation that
4477          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4478          This would require knowing the sign of OFFSET.  */
4479
4480 static bool
4481 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4482 {
4483   enum tree_code code;
4484   tree e1, e2;
4485   aff_tree aff_e1, aff_e2, aff_offset;
4486
4487   if (!nowrap_type_p (TREE_TYPE (base)))
4488     return false;
4489
4490   base = expand_simple_operations (base);
4491
4492   if (TREE_CODE (base) == SSA_NAME)
4493     {
4494       gimple stmt = SSA_NAME_DEF_STMT (base);
4495
4496       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4497         return false;
4498
4499       code = gimple_assign_rhs_code (stmt);
4500       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4501         return false;
4502
4503       e1 = gimple_assign_rhs1 (stmt);
4504       e2 = gimple_assign_rhs2 (stmt);
4505     }
4506   else
4507     {
4508       code = TREE_CODE (base);
4509       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4510         return false;
4511       e1 = TREE_OPERAND (base, 0);
4512       e2 = TREE_OPERAND (base, 1);
4513     }
4514
4515   /* Use affine expansion as deeper inspection to prove the equality.  */
4516   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4517                                   &aff_e2, &data->name_expansion_cache);
4518   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4519                                   &aff_offset, &data->name_expansion_cache);
4520   aff_combination_scale (&aff_offset, -1);
4521   switch (code)
4522     {
4523     case PLUS_EXPR:
4524       aff_combination_add (&aff_e2, &aff_offset);
4525       if (aff_combination_zero_p (&aff_e2))
4526         return true;
4527
4528       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4529                                       &aff_e1, &data->name_expansion_cache);
4530       aff_combination_add (&aff_e1, &aff_offset);
4531       return aff_combination_zero_p (&aff_e1);
4532
4533     case POINTER_PLUS_EXPR:
4534       aff_combination_add (&aff_e2, &aff_offset);
4535       return aff_combination_zero_p (&aff_e2);
4536
4537     default:
4538       return false;
4539     }
4540 }
4541
4542 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4543    comparison with CAND.  NITER describes the number of iterations of
4544    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4545
4546    We aim to handle the following situation:
4547
4548    sometype *base, *p;
4549    int a, b, i;
4550
4551    i = a;
4552    p = p_0 = base + a;
4553
4554    do
4555      {
4556        bla (*p);
4557        p++;
4558        i++;
4559      }
4560    while (i < b);
4561
4562    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4563    We aim to optimize this to
4564
4565    p = p_0 = base + a;
4566    do
4567      {
4568        bla (*p);
4569        p++;
4570      }
4571    while (p < p_0 - a + b);
4572
4573    This preserves the correctness, since the pointer arithmetics does not
4574    overflow.  More precisely:
4575
4576    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4577       overflow in computing it or the values of p.
4578    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4579       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4580
4581 static bool
4582 iv_elimination_compare_lt (struct ivopts_data *data,
4583                            struct iv_cand *cand, enum tree_code *comp_p,
4584                            struct tree_niter_desc *niter)
4585 {
4586   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4587   struct aff_tree nit, tmpa, tmpb;
4588   enum tree_code comp;
4589   HOST_WIDE_INT step;
4590
4591   /* We need to know that the candidate induction variable does not overflow.
4592      While more complex analysis may be used to prove this, for now just
4593      check that the variable appears in the original program and that it
4594      is computed in a type that guarantees no overflows.  */
4595   cand_type = TREE_TYPE (cand->iv->base);
4596   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4597     return false;
4598
4599   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4600      the calculation of the BOUND could overflow, making the comparison
4601      invalid.  */
4602   if (!data->loop_single_exit_p)
4603     return false;
4604
4605   /* We need to be able to decide whether candidate is increasing or decreasing
4606      in order to choose the right comparison operator.  */
4607   if (!cst_and_fits_in_hwi (cand->iv->step))
4608     return false;
4609   step = int_cst_value (cand->iv->step);
4610
4611   /* Check that the number of iterations matches the expected pattern:
4612      a + 1 > b ? 0 : b - a - 1.  */
4613   mbz = niter->may_be_zero;
4614   if (TREE_CODE (mbz) == GT_EXPR)
4615     {
4616       /* Handle a + 1 > b.  */
4617       tree op0 = TREE_OPERAND (mbz, 0);
4618       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4619         {
4620           a = TREE_OPERAND (op0, 0);
4621           b = TREE_OPERAND (mbz, 1);
4622         }
4623       else
4624         return false;
4625     }
4626   else if (TREE_CODE (mbz) == LT_EXPR)
4627     {
4628       tree op1 = TREE_OPERAND (mbz, 1);
4629
4630       /* Handle b < a + 1.  */
4631       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4632         {
4633           a = TREE_OPERAND (op1, 0);
4634           b = TREE_OPERAND (mbz, 0);
4635         }
4636       else
4637         return false;
4638     }
4639   else
4640     return false;
4641
4642   /* Expected number of iterations is B - A - 1.  Check that it matches
4643      the actual number, i.e., that B - A - NITER = 1.  */
4644   tree_to_aff_combination (niter->niter, nit_type, &nit);
4645   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4646   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4647   aff_combination_scale (&nit, -1);
4648   aff_combination_scale (&tmpa, -1);
4649   aff_combination_add (&tmpb, &tmpa);
4650   aff_combination_add (&tmpb, &nit);
4651   if (tmpb.n != 0 || tmpb.offset != 1)
4652     return false;
4653
4654   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4655      overflow.  */
4656   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4657                         cand->iv->step,
4658                         fold_convert (TREE_TYPE (cand->iv->step), a));
4659   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4660     return false;
4661
4662   /* Determine the new comparison operator.  */
4663   comp = step < 0 ? GT_EXPR : LT_EXPR;
4664   if (*comp_p == NE_EXPR)
4665     *comp_p = comp;
4666   else if (*comp_p == EQ_EXPR)
4667     *comp_p = invert_tree_comparison (comp, false);
4668   else
4669     gcc_unreachable ();
4670
4671   return true;
4672 }
4673
4674 /* Check whether it is possible to express the condition in USE by comparison
4675    of candidate CAND.  If so, store the value compared with to BOUND, and the
4676    comparison operator to COMP.  */
4677
4678 static bool
4679 may_eliminate_iv (struct ivopts_data *data,
4680                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4681                   enum tree_code *comp)
4682 {
4683   basic_block ex_bb;
4684   edge exit;
4685   tree period;
4686   struct loop *loop = data->current_loop;
4687   aff_tree bnd;
4688   struct tree_niter_desc *desc = NULL;
4689
4690   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4691     return false;
4692
4693   /* For now works only for exits that dominate the loop latch.
4694      TODO: extend to other conditions inside loop body.  */
4695   ex_bb = gimple_bb (use->stmt);
4696   if (use->stmt != last_stmt (ex_bb)
4697       || gimple_code (use->stmt) != GIMPLE_COND
4698       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4699     return false;
4700
4701   exit = EDGE_SUCC (ex_bb, 0);
4702   if (flow_bb_inside_loop_p (loop, exit->dest))
4703     exit = EDGE_SUCC (ex_bb, 1);
4704   if (flow_bb_inside_loop_p (loop, exit->dest))
4705     return false;
4706
4707   desc = niter_for_exit (data, exit);
4708   if (!desc)
4709     return false;
4710
4711   /* Determine whether we can use the variable to test the exit condition.
4712      This is the case iff the period of the induction variable is greater
4713      than the number of iterations for which the exit condition is true.  */
4714   period = iv_period (cand->iv);
4715
4716   /* If the number of iterations is constant, compare against it directly.  */
4717   if (TREE_CODE (desc->niter) == INTEGER_CST)
4718     {
4719       /* See cand_value_at.  */
4720       if (stmt_after_increment (loop, cand, use->stmt))
4721         {
4722           if (!tree_int_cst_lt (desc->niter, period))
4723             return false;
4724         }
4725       else
4726         {
4727           if (tree_int_cst_lt (period, desc->niter))
4728             return false;
4729         }
4730     }
4731
4732   /* If not, and if this is the only possible exit of the loop, see whether
4733      we can get a conservative estimate on the number of iterations of the
4734      entire loop and compare against that instead.  */
4735   else
4736     {
4737       widest_int period_value, max_niter;
4738
4739       max_niter = desc->max;
4740       if (stmt_after_increment (loop, cand, use->stmt))
4741         max_niter += 1;
4742       period_value = wi::to_widest (period);
4743       if (wi::gtu_p (max_niter, period_value))
4744         {
4745           /* See if we can take advantage of inferred loop bound information.  */
4746           if (data->loop_single_exit_p)
4747             {
4748               if (!max_loop_iterations (loop, &max_niter))
4749                 return false;
4750               /* The loop bound is already adjusted by adding 1.  */
4751               if (wi::gtu_p (max_niter, period_value))
4752                 return false;
4753             }
4754           else
4755             return false;
4756         }
4757     }
4758
4759   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4760
4761   *bound = fold_convert (TREE_TYPE (cand->iv->base),
4762                          aff_combination_to_tree (&bnd));
4763   *comp = iv_elimination_compare (data, use);
4764
4765   /* It is unlikely that computing the number of iterations using division
4766      would be more profitable than keeping the original induction variable.  */
4767   if (expression_expensive_p (*bound))
4768     return false;
4769
4770   /* Sometimes, it is possible to handle the situation that the number of
4771      iterations may be zero unless additional assumtions by using <
4772      instead of != in the exit condition.
4773
4774      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4775            base the exit condition on it.  However, that is often too
4776            expensive.  */
4777   if (!integer_zerop (desc->may_be_zero))
4778     return iv_elimination_compare_lt (data, cand, comp, desc);
4779
4780   return true;
4781 }
4782
4783  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4784     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4785
4786 static int
4787 parm_decl_cost (struct ivopts_data *data, tree bound)
4788 {
4789   tree sbound = bound;
4790   STRIP_NOPS (sbound);
4791
4792   if (TREE_CODE (sbound) == SSA_NAME
4793       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4794       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4795       && data->body_includes_call)
4796     return COSTS_N_INSNS (1);
4797
4798   return 0;
4799 }
4800
4801 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4802
4803 static bool
4804 determine_use_iv_cost_condition (struct ivopts_data *data,
4805                                  struct iv_use *use, struct iv_cand *cand)
4806 {
4807   tree bound = NULL_TREE;
4808   struct iv *cmp_iv;
4809   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4810   comp_cost elim_cost, express_cost, cost, bound_cost;
4811   bool ok;
4812   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4813   tree *control_var, *bound_cst;
4814   enum tree_code comp = ERROR_MARK;
4815
4816   /* Only consider real candidates.  */
4817   if (!cand->iv)
4818     {
4819       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4820                        ERROR_MARK, -1);
4821       return false;
4822     }
4823
4824   /* Try iv elimination.  */
4825   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4826     {
4827       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4828       if (elim_cost.cost == 0)
4829         elim_cost.cost = parm_decl_cost (data, bound);
4830       else if (TREE_CODE (bound) == INTEGER_CST)
4831         elim_cost.cost = 0;
4832       /* If we replace a loop condition 'i < n' with 'p < base + n',
4833          depends_on_elim will have 'base' and 'n' set, which implies
4834          that both 'base' and 'n' will be live during the loop.  More likely,
4835          'base + n' will be loop invariant, resulting in only one live value
4836          during the loop.  So in that case we clear depends_on_elim and set
4837         elim_inv_expr_id instead.  */
4838       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4839         {
4840           elim_inv_expr_id = get_expr_id (data, bound);
4841           bitmap_clear (depends_on_elim);
4842         }
4843       /* The bound is a loop invariant, so it will be only computed
4844          once.  */
4845       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4846     }
4847   else
4848     elim_cost = infinite_cost;
4849
4850   /* Try expressing the original giv.  If it is compared with an invariant,
4851      note that we cannot get rid of it.  */
4852   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4853                               NULL, &cmp_iv);
4854   gcc_assert (ok);
4855
4856   /* When the condition is a comparison of the candidate IV against
4857      zero, prefer this IV.
4858
4859      TODO: The constant that we're subtracting from the cost should
4860      be target-dependent.  This information should be added to the
4861      target costs for each backend.  */
4862   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4863       && integer_zerop (*bound_cst)
4864       && (operand_equal_p (*control_var, cand->var_after, 0)
4865           || operand_equal_p (*control_var, cand->var_before, 0)))
4866     elim_cost.cost -= 1;
4867
4868   express_cost = get_computation_cost (data, use, cand, false,
4869                                        &depends_on_express, NULL,
4870                                        &express_inv_expr_id);
4871   fd_ivopts_data = data;
4872   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4873
4874   /* Count the cost of the original bound as well.  */
4875   bound_cost = force_var_cost (data, *bound_cst, NULL);
4876   if (bound_cost.cost == 0)
4877     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4878   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4879     bound_cost.cost = 0;
4880   express_cost.cost += bound_cost.cost;
4881
4882   /* Choose the better approach, preferring the eliminated IV. */
4883   if (compare_costs (elim_cost, express_cost) <= 0)
4884     {
4885       cost = elim_cost;
4886       depends_on = depends_on_elim;
4887       depends_on_elim = NULL;
4888       inv_expr_id = elim_inv_expr_id;
4889     }
4890   else
4891     {
4892       cost = express_cost;
4893       depends_on = depends_on_express;
4894       depends_on_express = NULL;
4895       bound = NULL_TREE;
4896       comp = ERROR_MARK;
4897       inv_expr_id = express_inv_expr_id;
4898     }
4899
4900   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4901
4902   if (depends_on_elim)
4903     BITMAP_FREE (depends_on_elim);
4904   if (depends_on_express)
4905     BITMAP_FREE (depends_on_express);
4906
4907   return !infinite_cost_p (cost);
4908 }
4909
4910 /* Determines cost of basing replacement of USE on CAND.  Returns false
4911    if USE cannot be based on CAND.  */
4912
4913 static bool
4914 determine_use_iv_cost (struct ivopts_data *data,
4915                        struct iv_use *use, struct iv_cand *cand)
4916 {
4917   switch (use->type)
4918     {
4919     case USE_NONLINEAR_EXPR:
4920       return determine_use_iv_cost_generic (data, use, cand);
4921
4922     case USE_ADDRESS:
4923       return determine_use_iv_cost_address (data, use, cand);
4924
4925     case USE_COMPARE:
4926       return determine_use_iv_cost_condition (data, use, cand);
4927
4928     default:
4929       gcc_unreachable ();
4930     }
4931 }
4932
4933 /* Return true if get_computation_cost indicates that autoincrement is
4934    a possibility for the pair of USE and CAND, false otherwise.  */
4935
4936 static bool
4937 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4938                            struct iv_cand *cand)
4939 {
4940   bitmap depends_on;
4941   bool can_autoinc;
4942   comp_cost cost;
4943
4944   if (use->type != USE_ADDRESS)
4945     return false;
4946
4947   cost = get_computation_cost (data, use, cand, true, &depends_on,
4948                                &can_autoinc, NULL);
4949
4950   BITMAP_FREE (depends_on);
4951
4952   return !infinite_cost_p (cost) && can_autoinc;
4953 }
4954
4955 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4956    use that allows autoincrement, and set their AINC_USE if possible.  */
4957
4958 static void
4959 set_autoinc_for_original_candidates (struct ivopts_data *data)
4960 {
4961   unsigned i, j;
4962
4963   for (i = 0; i < n_iv_cands (data); i++)
4964     {
4965       struct iv_cand *cand = iv_cand (data, i);
4966       struct iv_use *closest_before = NULL;
4967       struct iv_use *closest_after = NULL;
4968       if (cand->pos != IP_ORIGINAL)
4969         continue;
4970
4971       for (j = 0; j < n_iv_uses (data); j++)
4972         {
4973           struct iv_use *use = iv_use (data, j);
4974           unsigned uid = gimple_uid (use->stmt);
4975
4976           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4977             continue;
4978
4979           if (uid < gimple_uid (cand->incremented_at)
4980               && (closest_before == NULL
4981                   || uid > gimple_uid (closest_before->stmt)))
4982             closest_before = use;
4983
4984           if (uid > gimple_uid (cand->incremented_at)
4985               && (closest_after == NULL
4986                   || uid < gimple_uid (closest_after->stmt)))
4987             closest_after = use;
4988         }
4989
4990       if (closest_before != NULL
4991           && autoinc_possible_for_pair (data, closest_before, cand))
4992         cand->ainc_use = closest_before;
4993       else if (closest_after != NULL
4994                && autoinc_possible_for_pair (data, closest_after, cand))
4995         cand->ainc_use = closest_after;
4996     }
4997 }
4998
4999 /* Finds the candidates for the induction variables.  */
5000
5001 static void
5002 find_iv_candidates (struct ivopts_data *data)
5003 {
5004   /* Add commonly used ivs.  */
5005   add_standard_iv_candidates (data);
5006
5007   /* Add old induction variables.  */
5008   add_old_ivs_candidates (data);
5009
5010   /* Add induction variables derived from uses.  */
5011   add_derived_ivs_candidates (data);
5012
5013   set_autoinc_for_original_candidates (data);
5014
5015   /* Record the important candidates.  */
5016   record_important_candidates (data);
5017 }
5018
5019 /* Determines costs of basing the use of the iv on an iv candidate.  */
5020
5021 static void
5022 determine_use_iv_costs (struct ivopts_data *data)
5023 {
5024   unsigned i, j;
5025   struct iv_use *use;
5026   struct iv_cand *cand;
5027   bitmap to_clear = BITMAP_ALLOC (NULL);
5028
5029   alloc_use_cost_map (data);
5030
5031   for (i = 0; i < n_iv_uses (data); i++)
5032     {
5033       use = iv_use (data, i);
5034
5035       if (data->consider_all_candidates)
5036         {
5037           for (j = 0; j < n_iv_cands (data); j++)
5038             {
5039               cand = iv_cand (data, j);
5040               determine_use_iv_cost (data, use, cand);
5041             }
5042         }
5043       else
5044         {
5045           bitmap_iterator bi;
5046
5047           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5048             {
5049               cand = iv_cand (data, j);
5050               if (!determine_use_iv_cost (data, use, cand))
5051                 bitmap_set_bit (to_clear, j);
5052             }
5053
5054           /* Remove the candidates for that the cost is infinite from
5055              the list of related candidates.  */
5056           bitmap_and_compl_into (use->related_cands, to_clear);
5057           bitmap_clear (to_clear);
5058         }
5059     }
5060
5061   BITMAP_FREE (to_clear);
5062
5063   if (dump_file && (dump_flags & TDF_DETAILS))
5064     {
5065       fprintf (dump_file, "Use-candidate costs:\n");
5066
5067       for (i = 0; i < n_iv_uses (data); i++)
5068         {
5069           use = iv_use (data, i);
5070
5071           fprintf (dump_file, "Use %d:\n", i);
5072           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5073           for (j = 0; j < use->n_map_members; j++)
5074             {
5075               if (!use->cost_map[j].cand
5076                   || infinite_cost_p (use->cost_map[j].cost))
5077                 continue;
5078
5079               fprintf (dump_file, "  %d\t%d\t%d\t",
5080                        use->cost_map[j].cand->id,
5081                        use->cost_map[j].cost.cost,
5082                        use->cost_map[j].cost.complexity);
5083               if (use->cost_map[j].depends_on)
5084                 bitmap_print (dump_file,
5085                               use->cost_map[j].depends_on, "","");
5086               if (use->cost_map[j].inv_expr_id != -1)
5087                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5088               fprintf (dump_file, "\n");
5089             }
5090
5091           fprintf (dump_file, "\n");
5092         }
5093       fprintf (dump_file, "\n");
5094     }
5095 }
5096
5097 /* Determines cost of the candidate CAND.  */
5098
5099 static void
5100 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5101 {
5102   comp_cost cost_base;
5103   unsigned cost, cost_step;
5104   tree base;
5105
5106   if (!cand->iv)
5107     {
5108       cand->cost = 0;
5109       return;
5110     }
5111
5112   /* There are two costs associated with the candidate -- its increment
5113      and its initialization.  The second is almost negligible for any loop
5114      that rolls enough, so we take it just very little into account.  */
5115
5116   base = cand->iv->base;
5117   cost_base = force_var_cost (data, base, NULL);
5118   /* It will be exceptional that the iv register happens to be initialized with
5119      the proper value at no cost.  In general, there will at least be a regcopy
5120      or a const set.  */
5121   if (cost_base.cost == 0)
5122     cost_base.cost = COSTS_N_INSNS (1);
5123   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5124
5125   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5126
5127   /* Prefer the original ivs unless we may gain something by replacing it.
5128      The reason is to make debugging simpler; so this is not relevant for
5129      artificial ivs created by other optimization passes.  */
5130   if (cand->pos != IP_ORIGINAL
5131       || !SSA_NAME_VAR (cand->var_before)
5132       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5133     cost++;
5134
5135   /* Prefer not to insert statements into latch unless there are some
5136      already (so that we do not create unnecessary jumps).  */
5137   if (cand->pos == IP_END
5138       && empty_block_p (ip_end_pos (data->current_loop)))
5139     cost++;
5140
5141   cand->cost = cost;
5142   cand->cost_step = cost_step;
5143 }
5144
5145 /* Determines costs of computation of the candidates.  */
5146
5147 static void
5148 determine_iv_costs (struct ivopts_data *data)
5149 {
5150   unsigned i;
5151
5152   if (dump_file && (dump_flags & TDF_DETAILS))
5153     {
5154       fprintf (dump_file, "Candidate costs:\n");
5155       fprintf (dump_file, "  cand\tcost\n");
5156     }
5157
5158   for (i = 0; i < n_iv_cands (data); i++)
5159     {
5160       struct iv_cand *cand = iv_cand (data, i);
5161
5162       determine_iv_cost (data, cand);
5163
5164       if (dump_file && (dump_flags & TDF_DETAILS))
5165         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5166     }
5167
5168   if (dump_file && (dump_flags & TDF_DETAILS))
5169     fprintf (dump_file, "\n");
5170 }
5171
5172 /* Calculates cost for having SIZE induction variables.  */
5173
5174 static unsigned
5175 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5176 {
5177   /* We add size to the cost, so that we prefer eliminating ivs
5178      if possible.  */
5179   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5180                                             data->body_includes_call);
5181 }
5182
5183 /* For each size of the induction variable set determine the penalty.  */
5184
5185 static void
5186 determine_set_costs (struct ivopts_data *data)
5187 {
5188   unsigned j, n;
5189   gimple_phi phi;
5190   gimple_phi_iterator psi;
5191   tree op;
5192   struct loop *loop = data->current_loop;
5193   bitmap_iterator bi;
5194
5195   if (dump_file && (dump_flags & TDF_DETAILS))
5196     {
5197       fprintf (dump_file, "Global costs:\n");
5198       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5199       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5200       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5201       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5202     }
5203
5204   n = 0;
5205   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5206     {
5207       phi = psi.phi ();
5208       op = PHI_RESULT (phi);
5209
5210       if (virtual_operand_p (op))
5211         continue;
5212
5213       if (get_iv (data, op))
5214         continue;
5215
5216       n++;
5217     }
5218
5219   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5220     {
5221       struct version_info *info = ver_info (data, j);
5222
5223       if (info->inv_id && info->has_nonlin_use)
5224         n++;
5225     }
5226
5227   data->regs_used = n;
5228   if (dump_file && (dump_flags & TDF_DETAILS))
5229     fprintf (dump_file, "  regs_used %d\n", n);
5230
5231   if (dump_file && (dump_flags & TDF_DETAILS))
5232     {
5233       fprintf (dump_file, "  cost for size:\n");
5234       fprintf (dump_file, "  ivs\tcost\n");
5235       for (j = 0; j <= 2 * target_avail_regs; j++)
5236         fprintf (dump_file, "  %d\t%d\n", j,
5237                  ivopts_global_cost_for_size (data, j));
5238       fprintf (dump_file, "\n");
5239     }
5240 }
5241
5242 /* Returns true if A is a cheaper cost pair than B.  */
5243
5244 static bool
5245 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5246 {
5247   int cmp;
5248
5249   if (!a)
5250     return false;
5251
5252   if (!b)
5253     return true;
5254
5255   cmp = compare_costs (a->cost, b->cost);
5256   if (cmp < 0)
5257     return true;
5258
5259   if (cmp > 0)
5260     return false;
5261
5262   /* In case the costs are the same, prefer the cheaper candidate.  */
5263   if (a->cand->cost < b->cand->cost)
5264     return true;
5265
5266   return false;
5267 }
5268
5269
5270 /* Returns candidate by that USE is expressed in IVS.  */
5271
5272 static struct cost_pair *
5273 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5274 {
5275   return ivs->cand_for_use[use->id];
5276 }
5277
5278 /* Computes the cost field of IVS structure.  */
5279
5280 static void
5281 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5282 {
5283   comp_cost cost = ivs->cand_use_cost;
5284
5285   cost.cost += ivs->cand_cost;
5286
5287   cost.cost += ivopts_global_cost_for_size (data,
5288                                             ivs->n_regs + ivs->num_used_inv_expr);
5289
5290   ivs->cost = cost;
5291 }
5292
5293 /* Remove invariants in set INVS to set IVS.  */
5294
5295 static void
5296 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5297 {
5298   bitmap_iterator bi;
5299   unsigned iid;
5300
5301   if (!invs)
5302     return;
5303
5304   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5305     {
5306       ivs->n_invariant_uses[iid]--;
5307       if (ivs->n_invariant_uses[iid] == 0)
5308         ivs->n_regs--;
5309     }
5310 }
5311
5312 /* Set USE not to be expressed by any candidate in IVS.  */
5313
5314 static void
5315 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5316                  struct iv_use *use)
5317 {
5318   unsigned uid = use->id, cid;
5319   struct cost_pair *cp;
5320
5321   cp = ivs->cand_for_use[uid];
5322   if (!cp)
5323     return;
5324   cid = cp->cand->id;
5325
5326   ivs->bad_uses++;
5327   ivs->cand_for_use[uid] = NULL;
5328   ivs->n_cand_uses[cid]--;
5329
5330   if (ivs->n_cand_uses[cid] == 0)
5331     {
5332       bitmap_clear_bit (ivs->cands, cid);
5333       /* Do not count the pseudocandidates.  */
5334       if (cp->cand->iv)
5335         ivs->n_regs--;
5336       ivs->n_cands--;
5337       ivs->cand_cost -= cp->cand->cost;
5338
5339       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5340     }
5341
5342   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5343
5344   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5345
5346   if (cp->inv_expr_id != -1)
5347     {
5348       ivs->used_inv_expr[cp->inv_expr_id]--;
5349       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5350         ivs->num_used_inv_expr--;
5351     }
5352   iv_ca_recount_cost (data, ivs);
5353 }
5354
5355 /* Add invariants in set INVS to set IVS.  */
5356
5357 static void
5358 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5359 {
5360   bitmap_iterator bi;
5361   unsigned iid;
5362
5363   if (!invs)
5364     return;
5365
5366   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5367     {
5368       ivs->n_invariant_uses[iid]++;
5369       if (ivs->n_invariant_uses[iid] == 1)
5370         ivs->n_regs++;
5371     }
5372 }
5373
5374 /* Set cost pair for USE in set IVS to CP.  */
5375
5376 static void
5377 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5378               struct iv_use *use, struct cost_pair *cp)
5379 {
5380   unsigned uid = use->id, cid;
5381
5382   if (ivs->cand_for_use[uid] == cp)
5383     return;
5384
5385   if (ivs->cand_for_use[uid])
5386     iv_ca_set_no_cp (data, ivs, use);
5387
5388   if (cp)
5389     {
5390       cid = cp->cand->id;
5391
5392       ivs->bad_uses--;
5393       ivs->cand_for_use[uid] = cp;
5394       ivs->n_cand_uses[cid]++;
5395       if (ivs->n_cand_uses[cid] == 1)
5396         {
5397           bitmap_set_bit (ivs->cands, cid);
5398           /* Do not count the pseudocandidates.  */
5399           if (cp->cand->iv)
5400             ivs->n_regs++;
5401           ivs->n_cands++;
5402           ivs->cand_cost += cp->cand->cost;
5403
5404           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5405         }
5406
5407       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5408       iv_ca_set_add_invariants (ivs, cp->depends_on);
5409
5410       if (cp->inv_expr_id != -1)
5411         {
5412           ivs->used_inv_expr[cp->inv_expr_id]++;
5413           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5414             ivs->num_used_inv_expr++;
5415         }
5416       iv_ca_recount_cost (data, ivs);
5417     }
5418 }
5419
5420 /* Extend set IVS by expressing USE by some of the candidates in it
5421    if possible.  Consider all important candidates if candidates in
5422    set IVS don't give any result.  */
5423
5424 static void
5425 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5426                struct iv_use *use)
5427 {
5428   struct cost_pair *best_cp = NULL, *cp;
5429   bitmap_iterator bi;
5430   unsigned i;
5431   struct iv_cand *cand;
5432
5433   gcc_assert (ivs->upto >= use->id);
5434   ivs->upto++;
5435   ivs->bad_uses++;
5436
5437   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5438     {
5439       cand = iv_cand (data, i);
5440       cp = get_use_iv_cost (data, use, cand);
5441       if (cheaper_cost_pair (cp, best_cp))
5442         best_cp = cp;
5443     }
5444
5445   if (best_cp == NULL)
5446     {
5447       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5448         {
5449           cand = iv_cand (data, i);
5450           cp = get_use_iv_cost (data, use, cand);
5451           if (cheaper_cost_pair (cp, best_cp))
5452             best_cp = cp;
5453         }
5454     }
5455
5456   iv_ca_set_cp (data, ivs, use, best_cp);
5457 }
5458
5459 /* Get cost for assignment IVS.  */
5460
5461 static comp_cost
5462 iv_ca_cost (struct iv_ca *ivs)
5463 {
5464   /* This was a conditional expression but it triggered a bug in
5465      Sun C 5.5.  */
5466   if (ivs->bad_uses)
5467     return infinite_cost;
5468   else
5469     return ivs->cost;
5470 }
5471
5472 /* Returns true if all dependences of CP are among invariants in IVS.  */
5473
5474 static bool
5475 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5476 {
5477   unsigned i;
5478   bitmap_iterator bi;
5479
5480   if (!cp->depends_on)
5481     return true;
5482
5483   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5484     {
5485       if (ivs->n_invariant_uses[i] == 0)
5486         return false;
5487     }
5488
5489   return true;
5490 }
5491
5492 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5493    it before NEXT_CHANGE.  */
5494
5495 static struct iv_ca_delta *
5496 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5497                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5498 {
5499   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5500
5501   change->use = use;
5502   change->old_cp = old_cp;
5503   change->new_cp = new_cp;
5504   change->next_change = next_change;
5505
5506   return change;
5507 }
5508
5509 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5510    are rewritten.  */
5511
5512 static struct iv_ca_delta *
5513 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5514 {
5515   struct iv_ca_delta *last;
5516
5517   if (!l2)
5518     return l1;
5519
5520   if (!l1)
5521     return l2;
5522
5523   for (last = l1; last->next_change; last = last->next_change)
5524     continue;
5525   last->next_change = l2;
5526
5527   return l1;
5528 }
5529
5530 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5531
5532 static struct iv_ca_delta *
5533 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5534 {
5535   struct iv_ca_delta *act, *next, *prev = NULL;
5536   struct cost_pair *tmp;
5537
5538   for (act = delta; act; act = next)
5539     {
5540       next = act->next_change;
5541       act->next_change = prev;
5542       prev = act;
5543
5544       tmp = act->old_cp;
5545       act->old_cp = act->new_cp;
5546       act->new_cp = tmp;
5547     }
5548
5549   return prev;
5550 }
5551
5552 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5553    reverted instead.  */
5554
5555 static void
5556 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5557                     struct iv_ca_delta *delta, bool forward)
5558 {
5559   struct cost_pair *from, *to;
5560   struct iv_ca_delta *act;
5561
5562   if (!forward)
5563     delta = iv_ca_delta_reverse (delta);
5564
5565   for (act = delta; act; act = act->next_change)
5566     {
5567       from = act->old_cp;
5568       to = act->new_cp;
5569       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5570       iv_ca_set_cp (data, ivs, act->use, to);
5571     }
5572
5573   if (!forward)
5574     iv_ca_delta_reverse (delta);
5575 }
5576
5577 /* Returns true if CAND is used in IVS.  */
5578
5579 static bool
5580 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5581 {
5582   return ivs->n_cand_uses[cand->id] > 0;
5583 }
5584
5585 /* Returns number of induction variable candidates in the set IVS.  */
5586
5587 static unsigned
5588 iv_ca_n_cands (struct iv_ca *ivs)
5589 {
5590   return ivs->n_cands;
5591 }
5592
5593 /* Free the list of changes DELTA.  */
5594
5595 static void
5596 iv_ca_delta_free (struct iv_ca_delta **delta)
5597 {
5598   struct iv_ca_delta *act, *next;
5599
5600   for (act = *delta; act; act = next)
5601     {
5602       next = act->next_change;
5603       free (act);
5604     }
5605
5606   *delta = NULL;
5607 }
5608
5609 /* Allocates new iv candidates assignment.  */
5610
5611 static struct iv_ca *
5612 iv_ca_new (struct ivopts_data *data)
5613 {
5614   struct iv_ca *nw = XNEW (struct iv_ca);
5615
5616   nw->upto = 0;
5617   nw->bad_uses = 0;
5618   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5619   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5620   nw->cands = BITMAP_ALLOC (NULL);
5621   nw->n_cands = 0;
5622   nw->n_regs = 0;
5623   nw->cand_use_cost = no_cost;
5624   nw->cand_cost = 0;
5625   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5626   nw->cost = no_cost;
5627   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5628   nw->num_used_inv_expr = 0;
5629
5630   return nw;
5631 }
5632
5633 /* Free memory occupied by the set IVS.  */
5634
5635 static void
5636 iv_ca_free (struct iv_ca **ivs)
5637 {
5638   free ((*ivs)->cand_for_use);
5639   free ((*ivs)->n_cand_uses);
5640   BITMAP_FREE ((*ivs)->cands);
5641   free ((*ivs)->n_invariant_uses);
5642   free ((*ivs)->used_inv_expr);
5643   free (*ivs);
5644   *ivs = NULL;
5645 }
5646
5647 /* Dumps IVS to FILE.  */
5648
5649 static void
5650 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5651 {
5652   const char *pref = "  invariants ";
5653   unsigned i;
5654   comp_cost cost = iv_ca_cost (ivs);
5655
5656   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5657   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5658            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5659   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5660
5661    for (i = 0; i < ivs->upto; i++)
5662     {
5663       struct iv_use *use = iv_use (data, i);
5664       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5665       if (cp)
5666         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5667                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5668       else
5669         fprintf (file, "   use:%d --> ??\n", use->id);
5670     }
5671
5672   for (i = 1; i <= data->max_inv_id; i++)
5673     if (ivs->n_invariant_uses[i])
5674       {
5675         fprintf (file, "%s%d", pref, i);
5676         pref = ", ";
5677       }
5678   fprintf (file, "\n\n");
5679 }
5680
5681 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5682    new set, and store differences in DELTA.  Number of induction variables
5683    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5684    the function will try to find a solution with mimimal iv candidates.  */
5685
5686 static comp_cost
5687 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5688               struct iv_cand *cand, struct iv_ca_delta **delta,
5689               unsigned *n_ivs, bool min_ncand)
5690 {
5691   unsigned i;
5692   comp_cost cost;
5693   struct iv_use *use;
5694   struct cost_pair *old_cp, *new_cp;
5695
5696   *delta = NULL;
5697   for (i = 0; i < ivs->upto; i++)
5698     {
5699       use = iv_use (data, i);
5700       old_cp = iv_ca_cand_for_use (ivs, use);
5701
5702       if (old_cp
5703           && old_cp->cand == cand)
5704         continue;
5705
5706       new_cp = get_use_iv_cost (data, use, cand);
5707       if (!new_cp)
5708         continue;
5709
5710       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5711         continue;
5712
5713       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5714         continue;
5715
5716       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5717     }
5718
5719   iv_ca_delta_commit (data, ivs, *delta, true);
5720   cost = iv_ca_cost (ivs);
5721   if (n_ivs)
5722     *n_ivs = iv_ca_n_cands (ivs);
5723   iv_ca_delta_commit (data, ivs, *delta, false);
5724
5725   return cost;
5726 }
5727
5728 /* Try narrowing set IVS by removing CAND.  Return the cost of
5729    the new set and store the differences in DELTA.  START is
5730    the candidate with which we start narrowing.  */
5731
5732 static comp_cost
5733 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5734               struct iv_cand *cand, struct iv_cand *start,
5735               struct iv_ca_delta **delta)
5736 {
5737   unsigned i, ci;
5738   struct iv_use *use;
5739   struct cost_pair *old_cp, *new_cp, *cp;
5740   bitmap_iterator bi;
5741   struct iv_cand *cnd;
5742   comp_cost cost, best_cost, acost;
5743
5744   *delta = NULL;
5745   for (i = 0; i < n_iv_uses (data); i++)
5746     {
5747       use = iv_use (data, i);
5748
5749       old_cp = iv_ca_cand_for_use (ivs, use);
5750       if (old_cp->cand != cand)
5751         continue;
5752
5753       best_cost = iv_ca_cost (ivs);
5754       /* Start narrowing with START.  */
5755       new_cp = get_use_iv_cost (data, use, start);
5756
5757       if (data->consider_all_candidates)
5758         {
5759           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5760             {
5761               if (ci == cand->id || (start && ci == start->id))
5762                 continue;
5763
5764               cnd = iv_cand (data, ci);
5765
5766               cp = get_use_iv_cost (data, use, cnd);
5767               if (!cp)
5768                 continue;
5769
5770               iv_ca_set_cp (data, ivs, use, cp);
5771               acost = iv_ca_cost (ivs);
5772
5773               if (compare_costs (acost, best_cost) < 0)
5774                 {
5775                   best_cost = acost;
5776                   new_cp = cp;
5777                 }
5778             }
5779         }
5780       else
5781         {
5782           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5783             {
5784               if (ci == cand->id || (start && ci == start->id))
5785                 continue;
5786
5787               cnd = iv_cand (data, ci);
5788
5789               cp = get_use_iv_cost (data, use, cnd);
5790               if (!cp)
5791                 continue;
5792
5793               iv_ca_set_cp (data, ivs, use, cp);
5794               acost = iv_ca_cost (ivs);
5795
5796               if (compare_costs (acost, best_cost) < 0)
5797                 {
5798                   best_cost = acost;
5799                   new_cp = cp;
5800                 }
5801             }
5802         }
5803       /* Restore to old cp for use.  */
5804       iv_ca_set_cp (data, ivs, use, old_cp);
5805
5806       if (!new_cp)
5807         {
5808           iv_ca_delta_free (delta);
5809           return infinite_cost;
5810         }
5811
5812       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5813     }
5814
5815   iv_ca_delta_commit (data, ivs, *delta, true);
5816   cost = iv_ca_cost (ivs);
5817   iv_ca_delta_commit (data, ivs, *delta, false);
5818
5819   return cost;
5820 }
5821
5822 /* Try optimizing the set of candidates IVS by removing candidates different
5823    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5824    differences in DELTA.  */
5825
5826 static comp_cost
5827 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5828              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5829 {
5830   bitmap_iterator bi;
5831   struct iv_ca_delta *act_delta, *best_delta;
5832   unsigned i;
5833   comp_cost best_cost, acost;
5834   struct iv_cand *cand;
5835
5836   best_delta = NULL;
5837   best_cost = iv_ca_cost (ivs);
5838
5839   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5840     {
5841       cand = iv_cand (data, i);
5842
5843       if (cand == except_cand)
5844         continue;
5845
5846       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5847
5848       if (compare_costs (acost, best_cost) < 0)
5849         {
5850           best_cost = acost;
5851           iv_ca_delta_free (&best_delta);
5852           best_delta = act_delta;
5853         }
5854       else
5855         iv_ca_delta_free (&act_delta);
5856     }
5857
5858   if (!best_delta)
5859     {
5860       *delta = NULL;
5861       return best_cost;
5862     }
5863
5864   /* Recurse to possibly remove other unnecessary ivs.  */
5865   iv_ca_delta_commit (data, ivs, best_delta, true);
5866   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5867   iv_ca_delta_commit (data, ivs, best_delta, false);
5868   *delta = iv_ca_delta_join (best_delta, *delta);
5869   return best_cost;
5870 }
5871
5872 /* Tries to extend the sets IVS in the best possible way in order
5873    to express the USE.  If ORIGINALP is true, prefer candidates from
5874    the original set of IVs, otherwise favor important candidates not
5875    based on any memory object.  */
5876
5877 static bool
5878 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5879                   struct iv_use *use, bool originalp)
5880 {
5881   comp_cost best_cost, act_cost;
5882   unsigned i;
5883   bitmap_iterator bi;
5884   struct iv_cand *cand;
5885   struct iv_ca_delta *best_delta = NULL, *act_delta;
5886   struct cost_pair *cp;
5887
5888   iv_ca_add_use (data, ivs, use);
5889   best_cost = iv_ca_cost (ivs);
5890   cp = iv_ca_cand_for_use (ivs, use);
5891   if (cp)
5892     {
5893       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5894       iv_ca_set_no_cp (data, ivs, use);
5895     }
5896
5897   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5898      first try important candidates not based on any memory object.  Only if
5899      this fails, try the specific ones.  Rationale -- in loops with many
5900      variables the best choice often is to use just one generic biv.  If we
5901      added here many ivs specific to the uses, the optimization algorithm later
5902      would be likely to get stuck in a local minimum, thus causing us to create
5903      too many ivs.  The approach from few ivs to more seems more likely to be
5904      successful -- starting from few ivs, replacing an expensive use by a
5905      specific iv should always be a win.  */
5906   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5907     {
5908       cand = iv_cand (data, i);
5909
5910       if (originalp && cand->pos !=IP_ORIGINAL)
5911         continue;
5912
5913       if (!originalp && cand->iv->base_object != NULL_TREE)
5914         continue;
5915
5916       if (iv_ca_cand_used_p (ivs, cand))
5917         continue;
5918
5919       cp = get_use_iv_cost (data, use, cand);
5920       if (!cp)
5921         continue;
5922
5923       iv_ca_set_cp (data, ivs, use, cp);
5924       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5925                                true);
5926       iv_ca_set_no_cp (data, ivs, use);
5927       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5928
5929       if (compare_costs (act_cost, best_cost) < 0)
5930         {
5931           best_cost = act_cost;
5932
5933           iv_ca_delta_free (&best_delta);
5934           best_delta = act_delta;
5935         }
5936       else
5937         iv_ca_delta_free (&act_delta);
5938     }
5939
5940   if (infinite_cost_p (best_cost))
5941     {
5942       for (i = 0; i < use->n_map_members; i++)
5943         {
5944           cp = use->cost_map + i;
5945           cand = cp->cand;
5946           if (!cand)
5947             continue;
5948
5949           /* Already tried this.  */
5950           if (cand->important)
5951             {
5952               if (originalp && cand->pos == IP_ORIGINAL)
5953                 continue;
5954               if (!originalp && cand->iv->base_object == NULL_TREE)
5955                 continue;
5956             }
5957
5958           if (iv_ca_cand_used_p (ivs, cand))
5959             continue;
5960
5961           act_delta = NULL;
5962           iv_ca_set_cp (data, ivs, use, cp);
5963           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5964           iv_ca_set_no_cp (data, ivs, use);
5965           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5966                                        cp, act_delta);
5967
5968           if (compare_costs (act_cost, best_cost) < 0)
5969             {
5970               best_cost = act_cost;
5971
5972               if (best_delta)
5973                 iv_ca_delta_free (&best_delta);
5974               best_delta = act_delta;
5975             }
5976           else
5977             iv_ca_delta_free (&act_delta);
5978         }
5979     }
5980
5981   iv_ca_delta_commit (data, ivs, best_delta, true);
5982   iv_ca_delta_free (&best_delta);
5983
5984   return !infinite_cost_p (best_cost);
5985 }
5986
5987 /* Finds an initial assignment of candidates to uses.  */
5988
5989 static struct iv_ca *
5990 get_initial_solution (struct ivopts_data *data, bool originalp)
5991 {
5992   struct iv_ca *ivs = iv_ca_new (data);
5993   unsigned i;
5994
5995   for (i = 0; i < n_iv_uses (data); i++)
5996     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5997       {
5998         iv_ca_free (&ivs);
5999         return NULL;
6000       }
6001
6002   return ivs;
6003 }
6004
6005 /* Tries to improve set of induction variables IVS.  */
6006
6007 static bool
6008 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6009 {
6010   unsigned i, n_ivs;
6011   comp_cost acost, best_cost = iv_ca_cost (ivs);
6012   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6013   struct iv_cand *cand;
6014
6015   /* Try extending the set of induction variables by one.  */
6016   for (i = 0; i < n_iv_cands (data); i++)
6017     {
6018       cand = iv_cand (data, i);
6019
6020       if (iv_ca_cand_used_p (ivs, cand))
6021         continue;
6022
6023       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6024       if (!act_delta)
6025         continue;
6026
6027       /* If we successfully added the candidate and the set is small enough,
6028          try optimizing it by removing other candidates.  */
6029       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6030         {
6031           iv_ca_delta_commit (data, ivs, act_delta, true);
6032           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6033           iv_ca_delta_commit (data, ivs, act_delta, false);
6034           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6035         }
6036
6037       if (compare_costs (acost, best_cost) < 0)
6038         {
6039           best_cost = acost;
6040           iv_ca_delta_free (&best_delta);
6041           best_delta = act_delta;
6042         }
6043       else
6044         iv_ca_delta_free (&act_delta);
6045     }
6046
6047   if (!best_delta)
6048     {
6049       /* Try removing the candidates from the set instead.  */
6050       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6051
6052       /* Nothing more we can do.  */
6053       if (!best_delta)
6054         return false;
6055     }
6056
6057   iv_ca_delta_commit (data, ivs, best_delta, true);
6058   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6059   iv_ca_delta_free (&best_delta);
6060   return true;
6061 }
6062
6063 /* Attempts to find the optimal set of induction variables.  We do simple
6064    greedy heuristic -- we try to replace at most one candidate in the selected
6065    solution and remove the unused ivs while this improves the cost.  */
6066
6067 static struct iv_ca *
6068 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6069 {
6070   struct iv_ca *set;
6071
6072   /* Get the initial solution.  */
6073   set = get_initial_solution (data, originalp);
6074   if (!set)
6075     {
6076       if (dump_file && (dump_flags & TDF_DETAILS))
6077         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6078       return NULL;
6079     }
6080
6081   if (dump_file && (dump_flags & TDF_DETAILS))
6082     {
6083       fprintf (dump_file, "Initial set of candidates:\n");
6084       iv_ca_dump (data, dump_file, set);
6085     }
6086
6087   while (try_improve_iv_set (data, set))
6088     {
6089       if (dump_file && (dump_flags & TDF_DETAILS))
6090         {
6091           fprintf (dump_file, "Improved to:\n");
6092           iv_ca_dump (data, dump_file, set);
6093         }
6094     }
6095
6096   return set;
6097 }
6098
6099 static struct iv_ca *
6100 find_optimal_iv_set (struct ivopts_data *data)
6101 {
6102   unsigned i;
6103   struct iv_ca *set, *origset;
6104   struct iv_use *use;
6105   comp_cost cost, origcost;
6106
6107   /* Determine the cost based on a strategy that starts with original IVs,
6108      and try again using a strategy that prefers candidates not based
6109      on any IVs.  */
6110   origset = find_optimal_iv_set_1 (data, true);
6111   set = find_optimal_iv_set_1 (data, false);
6112
6113   if (!origset && !set)
6114     return NULL;
6115
6116   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6117   cost = set ? iv_ca_cost (set) : infinite_cost;
6118
6119   if (dump_file && (dump_flags & TDF_DETAILS))
6120     {
6121       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6122                origcost.cost, origcost.complexity);
6123       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6124                cost.cost, cost.complexity);
6125     }
6126
6127   /* Choose the one with the best cost.  */
6128   if (compare_costs (origcost, cost) <= 0)
6129     {
6130       if (set)
6131         iv_ca_free (&set);
6132       set = origset;
6133     }
6134   else if (origset)
6135     iv_ca_free (&origset);
6136
6137   for (i = 0; i < n_iv_uses (data); i++)
6138     {
6139       use = iv_use (data, i);
6140       use->selected = iv_ca_cand_for_use (set, use)->cand;
6141     }
6142
6143   return set;
6144 }
6145
6146 /* Creates a new induction variable corresponding to CAND.  */
6147
6148 static void
6149 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6150 {
6151   gimple_stmt_iterator incr_pos;
6152   tree base;
6153   bool after = false;
6154
6155   if (!cand->iv)
6156     return;
6157
6158   switch (cand->pos)
6159     {
6160     case IP_NORMAL:
6161       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6162       break;
6163
6164     case IP_END:
6165       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6166       after = true;
6167       break;
6168
6169     case IP_AFTER_USE:
6170       after = true;
6171       /* fall through */
6172     case IP_BEFORE_USE:
6173       incr_pos = gsi_for_stmt (cand->incremented_at);
6174       break;
6175
6176     case IP_ORIGINAL:
6177       /* Mark that the iv is preserved.  */
6178       name_info (data, cand->var_before)->preserve_biv = true;
6179       name_info (data, cand->var_after)->preserve_biv = true;
6180
6181       /* Rewrite the increment so that it uses var_before directly.  */
6182       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6183       return;
6184     }
6185
6186   gimple_add_tmp_var (cand->var_before);
6187
6188   base = unshare_expr (cand->iv->base);
6189
6190   create_iv (base, unshare_expr (cand->iv->step),
6191              cand->var_before, data->current_loop,
6192              &incr_pos, after, &cand->var_before, &cand->var_after);
6193 }
6194
6195 /* Creates new induction variables described in SET.  */
6196
6197 static void
6198 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6199 {
6200   unsigned i;
6201   struct iv_cand *cand;
6202   bitmap_iterator bi;
6203
6204   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6205     {
6206       cand = iv_cand (data, i);
6207       create_new_iv (data, cand);
6208     }
6209
6210   if (dump_file && (dump_flags & TDF_DETAILS))
6211     {
6212       fprintf (dump_file, "\nSelected IV set: \n");
6213       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6214         {
6215           cand = iv_cand (data, i);
6216           dump_cand (dump_file, cand);
6217         }
6218       fprintf (dump_file, "\n");
6219     }
6220 }
6221
6222 /* Rewrites USE (definition of iv used in a nonlinear expression)
6223    using candidate CAND.  */
6224
6225 static void
6226 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6227                             struct iv_use *use, struct iv_cand *cand)
6228 {
6229   tree comp;
6230   tree op, tgt;
6231   gimple_assign ass;
6232   gimple_stmt_iterator bsi;
6233
6234   /* An important special case -- if we are asked to express value of
6235      the original iv by itself, just exit; there is no need to
6236      introduce a new computation (that might also need casting the
6237      variable to unsigned and back).  */
6238   if (cand->pos == IP_ORIGINAL
6239       && cand->incremented_at == use->stmt)
6240     {
6241       enum tree_code stmt_code;
6242
6243       gcc_assert (is_gimple_assign (use->stmt));
6244       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6245
6246       /* Check whether we may leave the computation unchanged.
6247          This is the case only if it does not rely on other
6248          computations in the loop -- otherwise, the computation
6249          we rely upon may be removed in remove_unused_ivs,
6250          thus leading to ICE.  */
6251       stmt_code = gimple_assign_rhs_code (use->stmt);
6252       if (stmt_code == PLUS_EXPR
6253           || stmt_code == MINUS_EXPR
6254           || stmt_code == POINTER_PLUS_EXPR)
6255         {
6256           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6257             op = gimple_assign_rhs2 (use->stmt);
6258           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6259             op = gimple_assign_rhs1 (use->stmt);
6260           else
6261             op = NULL_TREE;
6262         }
6263       else
6264         op = NULL_TREE;
6265
6266       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6267         return;
6268     }
6269
6270   comp = get_computation (data->current_loop, use, cand);
6271   gcc_assert (comp != NULL_TREE);
6272
6273   switch (gimple_code (use->stmt))
6274     {
6275     case GIMPLE_PHI:
6276       tgt = PHI_RESULT (use->stmt);
6277
6278       /* If we should keep the biv, do not replace it.  */
6279       if (name_info (data, tgt)->preserve_biv)
6280         return;
6281
6282       bsi = gsi_after_labels (gimple_bb (use->stmt));
6283       break;
6284
6285     case GIMPLE_ASSIGN:
6286       tgt = gimple_assign_lhs (use->stmt);
6287       bsi = gsi_for_stmt (use->stmt);
6288       break;
6289
6290     default:
6291       gcc_unreachable ();
6292     }
6293
6294   if (!valid_gimple_rhs_p (comp)
6295       || (gimple_code (use->stmt) != GIMPLE_PHI
6296           /* We can't allow re-allocating the stmt as it might be pointed
6297              to still.  */
6298           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6299               >= gimple_num_ops (gsi_stmt (bsi)))))
6300     {
6301       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6302                                        true, GSI_SAME_STMT);
6303       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6304         {
6305           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6306           /* As this isn't a plain copy we have to reset alignment
6307              information.  */
6308           if (SSA_NAME_PTR_INFO (comp))
6309             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6310         }
6311     }
6312
6313   if (gimple_code (use->stmt) == GIMPLE_PHI)
6314     {
6315       ass = gimple_build_assign (tgt, comp);
6316       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6317
6318       bsi = gsi_for_stmt (use->stmt);
6319       remove_phi_node (&bsi, false);
6320     }
6321   else
6322     {
6323       gimple_assign_set_rhs_from_tree (&bsi, comp);
6324       use->stmt = gsi_stmt (bsi);
6325     }
6326 }
6327
6328 /* Performs a peephole optimization to reorder the iv update statement with
6329    a mem ref to enable instruction combining in later phases. The mem ref uses
6330    the iv value before the update, so the reordering transformation requires
6331    adjustment of the offset. CAND is the selected IV_CAND.
6332
6333    Example:
6334
6335    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6336    iv2 = iv1 + 1;
6337
6338    if (t < val)      (1)
6339      goto L;
6340    goto Head;
6341
6342
6343    directly propagating t over to (1) will introduce overlapping live range
6344    thus increase register pressure. This peephole transform it into:
6345
6346
6347    iv2 = iv1 + 1;
6348    t = MEM_REF (base, iv2, 8, 8);
6349    if (t < val)
6350      goto L;
6351    goto Head;
6352 */
6353
6354 static void
6355 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6356 {
6357   tree var_after;
6358   gimple iv_update, stmt;
6359   basic_block bb;
6360   gimple_stmt_iterator gsi, gsi_iv;
6361
6362   if (cand->pos != IP_NORMAL)
6363     return;
6364
6365   var_after = cand->var_after;
6366   iv_update = SSA_NAME_DEF_STMT (var_after);
6367
6368   bb = gimple_bb (iv_update);
6369   gsi = gsi_last_nondebug_bb (bb);
6370   stmt = gsi_stmt (gsi);
6371
6372   /* Only handle conditional statement for now.  */
6373   if (gimple_code (stmt) != GIMPLE_COND)
6374     return;
6375
6376   gsi_prev_nondebug (&gsi);
6377   stmt = gsi_stmt (gsi);
6378   if (stmt != iv_update)
6379     return;
6380
6381   gsi_prev_nondebug (&gsi);
6382   if (gsi_end_p (gsi))
6383     return;
6384
6385   stmt = gsi_stmt (gsi);
6386   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6387     return;
6388
6389   if (stmt != use->stmt)
6390     return;
6391
6392   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6393     return;
6394
6395   if (dump_file && (dump_flags & TDF_DETAILS))
6396     {
6397       fprintf (dump_file, "Reordering \n");
6398       print_gimple_stmt (dump_file, iv_update, 0, 0);
6399       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6400       fprintf (dump_file, "\n");
6401     }
6402
6403   gsi = gsi_for_stmt (use->stmt);
6404   gsi_iv = gsi_for_stmt (iv_update);
6405   gsi_move_before (&gsi_iv, &gsi);
6406
6407   cand->pos = IP_BEFORE_USE;
6408   cand->incremented_at = use->stmt;
6409 }
6410
6411 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6412
6413 static void
6414 rewrite_use_address (struct ivopts_data *data,
6415                      struct iv_use *use, struct iv_cand *cand)
6416 {
6417   aff_tree aff;
6418   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6419   tree base_hint = NULL_TREE;
6420   tree ref, iv;
6421   bool ok;
6422
6423   adjust_iv_update_pos (cand, use);
6424   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6425   gcc_assert (ok);
6426   unshare_aff_combination (&aff);
6427
6428   /* To avoid undefined overflow problems, all IV candidates use unsigned
6429      integer types.  The drawback is that this makes it impossible for
6430      create_mem_ref to distinguish an IV that is based on a memory object
6431      from one that represents simply an offset.
6432
6433      To work around this problem, we pass a hint to create_mem_ref that
6434      indicates which variable (if any) in aff is an IV based on a memory
6435      object.  Note that we only consider the candidate.  If this is not
6436      based on an object, the base of the reference is in some subexpression
6437      of the use -- but these will use pointer types, so they are recognized
6438      by the create_mem_ref heuristics anyway.  */
6439   if (cand->iv->base_object)
6440     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6441
6442   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6443   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6444                         reference_alias_ptr_type (*use->op_p),
6445                         iv, base_hint, data->speed);
6446   copy_ref_info (ref, *use->op_p);
6447   *use->op_p = ref;
6448 }
6449
6450 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6451    candidate CAND.  */
6452
6453 static void
6454 rewrite_use_compare (struct ivopts_data *data,
6455                      struct iv_use *use, struct iv_cand *cand)
6456 {
6457   tree comp, *var_p, op, bound;
6458   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6459   enum tree_code compare;
6460   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6461   bool ok;
6462
6463   bound = cp->value;
6464   if (bound)
6465     {
6466       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6467       tree var_type = TREE_TYPE (var);
6468       gimple_seq stmts;
6469
6470       if (dump_file && (dump_flags & TDF_DETAILS))
6471         {
6472           fprintf (dump_file, "Replacing exit test: ");
6473           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6474         }
6475       compare = cp->comp;
6476       bound = unshare_expr (fold_convert (var_type, bound));
6477       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6478       if (stmts)
6479         gsi_insert_seq_on_edge_immediate (
6480                 loop_preheader_edge (data->current_loop),
6481                 stmts);
6482
6483       gimple_cond cond_stmt = as_a <gimple_cond> (use->stmt);
6484       gimple_cond_set_lhs (cond_stmt, var);
6485       gimple_cond_set_code (cond_stmt, compare);
6486       gimple_cond_set_rhs (cond_stmt, op);
6487       return;
6488     }
6489
6490   /* The induction variable elimination failed; just express the original
6491      giv.  */
6492   comp = get_computation (data->current_loop, use, cand);
6493   gcc_assert (comp != NULL_TREE);
6494
6495   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6496   gcc_assert (ok);
6497
6498   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6499                                      true, GSI_SAME_STMT);
6500 }
6501
6502 /* Rewrites USE using candidate CAND.  */
6503
6504 static void
6505 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6506 {
6507   switch (use->type)
6508     {
6509       case USE_NONLINEAR_EXPR:
6510         rewrite_use_nonlinear_expr (data, use, cand);
6511         break;
6512
6513       case USE_ADDRESS:
6514         rewrite_use_address (data, use, cand);
6515         break;
6516
6517       case USE_COMPARE:
6518         rewrite_use_compare (data, use, cand);
6519         break;
6520
6521       default:
6522         gcc_unreachable ();
6523     }
6524
6525   update_stmt (use->stmt);
6526 }
6527
6528 /* Rewrite the uses using the selected induction variables.  */
6529
6530 static void
6531 rewrite_uses (struct ivopts_data *data)
6532 {
6533   unsigned i;
6534   struct iv_cand *cand;
6535   struct iv_use *use;
6536
6537   for (i = 0; i < n_iv_uses (data); i++)
6538     {
6539       use = iv_use (data, i);
6540       cand = use->selected;
6541       gcc_assert (cand);
6542
6543       rewrite_use (data, use, cand);
6544     }
6545 }
6546
6547 /* Removes the ivs that are not used after rewriting.  */
6548
6549 static void
6550 remove_unused_ivs (struct ivopts_data *data)
6551 {
6552   unsigned j;
6553   bitmap_iterator bi;
6554   bitmap toremove = BITMAP_ALLOC (NULL);
6555
6556   /* Figure out an order in which to release SSA DEFs so that we don't
6557      release something that we'd have to propagate into a debug stmt
6558      afterwards.  */
6559   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6560     {
6561       struct version_info *info;
6562
6563       info = ver_info (data, j);
6564       if (info->iv
6565           && !integer_zerop (info->iv->step)
6566           && !info->inv_id
6567           && !info->iv->have_use_for
6568           && !info->preserve_biv)
6569         {
6570           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6571
6572           tree def = info->iv->ssa_name;
6573
6574           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6575             {
6576               imm_use_iterator imm_iter;
6577               use_operand_p use_p;
6578               gimple stmt;
6579               int count = 0;
6580
6581               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6582                 {
6583                   if (!gimple_debug_bind_p (stmt))
6584                     continue;
6585
6586                   /* We just want to determine whether to do nothing
6587                      (count == 0), to substitute the computed
6588                      expression into a single use of the SSA DEF by
6589                      itself (count == 1), or to use a debug temp
6590                      because the SSA DEF is used multiple times or as
6591                      part of a larger expression (count > 1). */
6592                   count++;
6593                   if (gimple_debug_bind_get_value (stmt) != def)
6594                     count++;
6595
6596                   if (count > 1)
6597                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6598                 }
6599
6600               if (!count)
6601                 continue;
6602
6603               struct iv_use dummy_use;
6604               struct iv_cand *best_cand = NULL, *cand;
6605               unsigned i, best_pref = 0, cand_pref;
6606
6607               memset (&dummy_use, 0, sizeof (dummy_use));
6608               dummy_use.iv = info->iv;
6609               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6610                 {
6611                   cand = iv_use (data, i)->selected;
6612                   if (cand == best_cand)
6613                     continue;
6614                   cand_pref = operand_equal_p (cand->iv->step,
6615                                                info->iv->step, 0)
6616                     ? 4 : 0;
6617                   cand_pref
6618                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6619                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6620                     ? 2 : 0;
6621                   cand_pref
6622                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6623                     ? 1 : 0;
6624                   if (best_cand == NULL || best_pref < cand_pref)
6625                     {
6626                       best_cand = cand;
6627                       best_pref = cand_pref;
6628                     }
6629                 }
6630
6631               if (!best_cand)
6632                 continue;
6633
6634               tree comp = get_computation_at (data->current_loop,
6635                                               &dummy_use, best_cand,
6636                                               SSA_NAME_DEF_STMT (def));
6637               if (!comp)
6638                 continue;
6639
6640               if (count > 1)
6641                 {
6642                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6643                   DECL_ARTIFICIAL (vexpr) = 1;
6644                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6645                   if (SSA_NAME_VAR (def))
6646                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6647                   else
6648                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6649                   gimple_debug def_temp =
6650                     gimple_build_debug_bind (vexpr, comp, NULL);
6651                   gimple_stmt_iterator gsi;
6652
6653                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6654                     gsi = gsi_after_labels (gimple_bb
6655                                             (SSA_NAME_DEF_STMT (def)));
6656                   else
6657                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6658
6659                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6660                   comp = vexpr;
6661                 }
6662
6663               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6664                 {
6665                   if (!gimple_debug_bind_p (stmt))
6666                     continue;
6667
6668                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6669                     SET_USE (use_p, comp);
6670
6671                   update_stmt (stmt);
6672                 }
6673             }
6674         }
6675     }
6676
6677   release_defs_bitset (toremove);
6678
6679   BITMAP_FREE (toremove);
6680 }
6681
6682 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6683    for hash_map::traverse.  */
6684
6685 bool
6686 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6687 {
6688   free (value);
6689   return true;
6690 }
6691
6692 /* Frees data allocated by the optimization of a single loop.  */
6693
6694 static void
6695 free_loop_data (struct ivopts_data *data)
6696 {
6697   unsigned i, j;
6698   bitmap_iterator bi;
6699   tree obj;
6700
6701   if (data->niters)
6702     {
6703       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6704       delete data->niters;
6705       data->niters = NULL;
6706     }
6707
6708   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6709     {
6710       struct version_info *info;
6711
6712       info = ver_info (data, i);
6713       free (info->iv);
6714       info->iv = NULL;
6715       info->has_nonlin_use = false;
6716       info->preserve_biv = false;
6717       info->inv_id = 0;
6718     }
6719   bitmap_clear (data->relevant);
6720   bitmap_clear (data->important_candidates);
6721
6722   for (i = 0; i < n_iv_uses (data); i++)
6723     {
6724       struct iv_use *use = iv_use (data, i);
6725
6726       free (use->iv);
6727       BITMAP_FREE (use->related_cands);
6728       for (j = 0; j < use->n_map_members; j++)
6729         if (use->cost_map[j].depends_on)
6730           BITMAP_FREE (use->cost_map[j].depends_on);
6731       free (use->cost_map);
6732       free (use);
6733     }
6734   data->iv_uses.truncate (0);
6735
6736   for (i = 0; i < n_iv_cands (data); i++)
6737     {
6738       struct iv_cand *cand = iv_cand (data, i);
6739
6740       free (cand->iv);
6741       if (cand->depends_on)
6742         BITMAP_FREE (cand->depends_on);
6743       free (cand);
6744     }
6745   data->iv_candidates.truncate (0);
6746
6747   if (data->version_info_size < num_ssa_names)
6748     {
6749       data->version_info_size = 2 * num_ssa_names;
6750       free (data->version_info);
6751       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6752     }
6753
6754   data->max_inv_id = 0;
6755
6756   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6757     SET_DECL_RTL (obj, NULL_RTX);
6758
6759   decl_rtl_to_reset.truncate (0);
6760
6761   data->inv_expr_tab->empty ();
6762   data->inv_expr_id = 0;
6763 }
6764
6765 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6766    loop tree.  */
6767
6768 static void
6769 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6770 {
6771   free_loop_data (data);
6772   free (data->version_info);
6773   BITMAP_FREE (data->relevant);
6774   BITMAP_FREE (data->important_candidates);
6775
6776   decl_rtl_to_reset.release ();
6777   data->iv_uses.release ();
6778   data->iv_candidates.release ();
6779   delete data->inv_expr_tab;
6780   data->inv_expr_tab = NULL;
6781   free_affine_expand_cache (&data->name_expansion_cache);
6782 }
6783
6784 /* Returns true if the loop body BODY includes any function calls.  */
6785
6786 static bool
6787 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6788 {
6789   gimple_stmt_iterator gsi;
6790   unsigned i;
6791
6792   for (i = 0; i < num_nodes; i++)
6793     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6794       {
6795         gimple stmt = gsi_stmt (gsi);
6796         if (is_gimple_call (stmt)
6797             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6798           return true;
6799       }
6800   return false;
6801 }
6802
6803 /* Optimizes the LOOP.  Returns true if anything changed.  */
6804
6805 static bool
6806 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6807 {
6808   bool changed = false;
6809   struct iv_ca *iv_ca;
6810   edge exit = single_dom_exit (loop);
6811   basic_block *body;
6812
6813   gcc_assert (!data->niters);
6814   data->current_loop = loop;
6815   data->speed = optimize_loop_for_speed_p (loop);
6816
6817   if (dump_file && (dump_flags & TDF_DETAILS))
6818     {
6819       fprintf (dump_file, "Processing loop %d\n", loop->num);
6820
6821       if (exit)
6822         {
6823           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6824                    exit->src->index, exit->dest->index);
6825           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6826           fprintf (dump_file, "\n");
6827         }
6828
6829       fprintf (dump_file, "\n");
6830     }
6831
6832   body = get_loop_body (loop);
6833   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6834   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6835   free (body);
6836
6837   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6838
6839   /* For each ssa name determines whether it behaves as an induction variable
6840      in some loop.  */
6841   if (!find_induction_variables (data))
6842     goto finish;
6843
6844   /* Finds interesting uses (item 1).  */
6845   find_interesting_uses (data);
6846   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6847     goto finish;
6848
6849   /* Finds candidates for the induction variables (item 2).  */
6850   find_iv_candidates (data);
6851
6852   /* Calculates the costs (item 3, part 1).  */
6853   determine_iv_costs (data);
6854   determine_use_iv_costs (data);
6855   determine_set_costs (data);
6856
6857   /* Find the optimal set of induction variables (item 3, part 2).  */
6858   iv_ca = find_optimal_iv_set (data);
6859   if (!iv_ca)
6860     goto finish;
6861   changed = true;
6862
6863   /* Create the new induction variables (item 4, part 1).  */
6864   create_new_ivs (data, iv_ca);
6865   iv_ca_free (&iv_ca);
6866
6867   /* Rewrite the uses (item 4, part 2).  */
6868   rewrite_uses (data);
6869
6870   /* Remove the ivs that are unused after rewriting.  */
6871   remove_unused_ivs (data);
6872
6873   /* We have changed the structure of induction variables; it might happen
6874      that definitions in the scev database refer to some of them that were
6875      eliminated.  */
6876   scev_reset ();
6877
6878 finish:
6879   free_loop_data (data);
6880
6881   return changed;
6882 }
6883
6884 /* Main entry point.  Optimizes induction variables in loops.  */
6885
6886 void
6887 tree_ssa_iv_optimize (void)
6888 {
6889   struct loop *loop;
6890   struct ivopts_data data;
6891
6892   tree_ssa_iv_optimize_init (&data);
6893
6894   /* Optimize the loops starting with the innermost ones.  */
6895   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6896     {
6897       if (dump_file && (dump_flags & TDF_DETAILS))
6898         flow_loop_dump (loop, dump_file, NULL, 1);
6899
6900       tree_ssa_iv_optimize_loop (&data, loop);
6901     }
6902
6903   tree_ssa_iv_optimize_finalize (&data);
6904 }