gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Types of uses.  */
 125 enum use_type
 126 {
 127   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 128   USE_ADDRESS,          /* Use in an address.  */
 129   USE_COMPARE           /* Use is a compare.  */
 130 };
 131
 132 /* The candidate - cost pair.  */
 133 struct cost_pair
 134 {
 135   struct iv_cand *cand; /* The candidate.  */
 136   unsigned cost;        /* The cost.  */
 137   bitmap depends_on;    /* The list of invariants that have to be
 138                            preserved.  */
 139   tree value;           /* For final value elimination, the expression for
 140                            the final value of the iv.  For iv elimination,
 141                            the new bound to compare with.  */
 142 };
 143
 144 /* Use.  */
 145 struct iv_use
 146 {
 147   unsigned id;          /* The id of the use.  */
 148   enum use_type type;   /* Type of the use.  */
 149   struct iv *iv;        /* The induction variable it is based on.  */
 150   tree stmt;            /* Statement in that it occurs.  */
 151   tree *op_p;           /* The place where it occurs.  */
 152   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 153                            important ones.  */
 154
 155   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 156   struct cost_pair *cost_map;
 157                         /* The costs wrto the iv candidates.  */
 158
 159   struct iv_cand *selected;
 160                         /* The selected candidate.  */
 161 };
 162
 163 /* The position where the iv is computed.  */
 164 enum iv_position
 165 {
 166   IP_NORMAL,            /* At the end, just before the exit condition.  */
 167   IP_END,               /* At the end of the latch block.  */
 168   IP_ORIGINAL           /* The original biv.  */
 169 };
 170
 171 /* The induction variable candidate.  */
 172 struct iv_cand
 173 {
 174   unsigned id;          /* The number of the candidate.  */
 175   bool important;       /* Whether this is an "important" candidate, i.e. such
 176                            that it should be considered by all uses.  */
 177   enum iv_position pos; /* Where it is computed.  */
 178   tree incremented_at;  /* For original biv, the statement where it is
 179                            incremented.  */
 180   tree var_before;      /* The variable used for it before increment.  */
 181   tree var_after;       /* The variable used for it after increment.  */
 182   struct iv *iv;        /* The value of the candidate.  NULL for
 183                            "pseudocandidate" used to indicate the possibility
 184                            to replace the final value of an iv by direct
 185                            computation of the value.  */
 186   unsigned cost;        /* Cost of the candidate.  */
 187   bitmap depends_on;    /* The list of invariants that are used in step of the
 188                            biv.  */
 189 };
 190
 191 /* The data used by the induction variable optimizations.  */
 192
 193 typedef struct iv_use *iv_use_p;
 194 DEF_VEC_P(iv_use_p);
 195 DEF_VEC_ALLOC_P(iv_use_p,heap);
 196
 197 typedef struct iv_cand *iv_cand_p;
 198 DEF_VEC_P(iv_cand_p);
 199 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 200
 201 struct ivopts_data
 202 {
 203   /* The currently optimized loop.  */
 204   struct loop *current_loop;
 205
 206   /* Number of registers used in it.  */
 207   unsigned regs_used;
 208
 209   /* Numbers of iterations for all exits of the current loop.  */
 210   htab_t niters;
 211
 212   /* The size of version_info array allocated.  */
 213   unsigned version_info_size;
 214
 215   /* The array of information for the ssa names.  */
 216   struct version_info *version_info;
 217
 218   /* The bitmap of indices in version_info whose value was changed.  */
 219   bitmap relevant;
 220
 221   /* The maximum invariant id.  */
 222   unsigned max_inv_id;
 223
 224   /* The uses of induction variables.  */
 225   VEC(iv_use_p,heap) *iv_uses;
 226
 227   /* The candidates.  */
 228   VEC(iv_cand_p,heap) *iv_candidates;
 229
 230   /* A bitmap of important candidates.  */
 231   bitmap important_candidates;
 232
 233   /* Whether to consider just related and important candidates when replacing a
 234      use.  */
 235   bool consider_all_candidates;
 236 };
 237
 238 /* An assignment of iv candidates to uses.  */
 239
 240 struct iv_ca
 241 {
 242   /* The number of uses covered by the assignment.  */
 243   unsigned upto;
 244
 245   /* Number of uses that cannot be expressed by the candidates in the set.  */
 246   unsigned bad_uses;
 247
 248   /* Candidate assigned to a use, together with the related costs.  */
 249   struct cost_pair **cand_for_use;
 250
 251   /* Number of times each candidate is used.  */
 252   unsigned *n_cand_uses;
 253
 254   /* The candidates used.  */
 255   bitmap cands;
 256
 257   /* The number of candidates in the set.  */
 258   unsigned n_cands;
 259
 260   /* Total number of registers needed.  */
 261   unsigned n_regs;
 262
 263   /* Total cost of expressing uses.  */
 264   unsigned cand_use_cost;
 265
 266   /* Total cost of candidates.  */
 267   unsigned cand_cost;
 268
 269   /* Number of times each invariant is used.  */
 270   unsigned *n_invariant_uses;
 271
 272   /* Total cost of the assignment.  */
 273   unsigned cost;
 274 };
 275
 276 /* Difference of two iv candidate assignments.  */
 277
 278 struct iv_ca_delta
 279 {
 280   /* Changed use.  */
 281   struct iv_use *use;
 282
 283   /* An old assignment (for rollback purposes).  */
 284   struct cost_pair *old_cp;
 285
 286   /* A new assignment.  */
 287   struct cost_pair *new_cp;
 288
 289   /* Next change in the list.  */
 290   struct iv_ca_delta *next_change;
 291 };
 292
 293 /* Bound on number of candidates below that all candidates are considered.  */
 294
 295 #define CONSIDER_ALL_CANDIDATES_BOUND \
 296   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 297
 298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 299    optimizing such a loop would help, and it would take ages).  */
 300
 301 #define MAX_CONSIDERED_USES \
 302   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 303
 304 /* If there are at most this number of ivs in the set, try removing unnecessary
 305    ivs from the set always.  */
 306
 307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 308   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 309
 310 /* The list of trees for that the decl_rtl field must be reset is stored
 311    here.  */
 312
 313 static VEC(tree,heap) *decl_rtl_to_reset;
 314
 315 /* Number of uses recorded in DATA.  */
 316
 317 static inline unsigned
 318 n_iv_uses (struct ivopts_data *data)
 319 {
 320   return VEC_length (iv_use_p, data->iv_uses);
 321 }
 322
 323 /* Ith use recorded in DATA.  */
 324
 325 static inline struct iv_use *
 326 iv_use (struct ivopts_data *data, unsigned i)
 327 {
 328   return VEC_index (iv_use_p, data->iv_uses, i);
 329 }
 330
 331 /* Number of candidates recorded in DATA.  */
 332
 333 static inline unsigned
 334 n_iv_cands (struct ivopts_data *data)
 335 {
 336   return VEC_length (iv_cand_p, data->iv_candidates);
 337 }
 338
 339 /* Ith candidate recorded in DATA.  */
 340
 341 static inline struct iv_cand *
 342 iv_cand (struct ivopts_data *data, unsigned i)
 343 {
 344   return VEC_index (iv_cand_p, data->iv_candidates, i);
 345 }
 346
 347 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 348
 349 edge
 350 single_dom_exit (struct loop *loop)
 351 {
 352   edge exit = loop->single_exit;
 353
 354   if (!exit)
 355     return NULL;
 356
 357   if (!just_once_each_iteration_p (loop, exit->src))
 358     return NULL;
 359
 360   return exit;
 361 }
 362
 363 /* Dumps information about the induction variable IV to FILE.  */
 364
 365 extern void dump_iv (FILE *, struct iv *);
 366 void
 367 dump_iv (FILE *file, struct iv *iv)
 368 {
 369   if (iv->ssa_name)
 370     {
 371       fprintf (file, "ssa name ");
 372       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 373       fprintf (file, "\n");
 374     }
 375
 376   fprintf (file, "  type ");
 377   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 378   fprintf (file, "\n");
 379
 380   if (iv->step)
 381     {
 382       fprintf (file, "  base ");
 383       print_generic_expr (file, iv->base, TDF_SLIM);
 384       fprintf (file, "\n");
 385
 386       fprintf (file, "  step ");
 387       print_generic_expr (file, iv->step, TDF_SLIM);
 388       fprintf (file, "\n");
 389     }
 390   else
 391     {
 392       fprintf (file, "  invariant ");
 393       print_generic_expr (file, iv->base, TDF_SLIM);
 394       fprintf (file, "\n");
 395     }
 396
 397   if (iv->base_object)
 398     {
 399       fprintf (file, "  base object ");
 400       print_generic_expr (file, iv->base_object, TDF_SLIM);
 401       fprintf (file, "\n");
 402     }
 403
 404   if (iv->biv_p)
 405     fprintf (file, "  is a biv\n");
 406 }
 407
 408 /* Dumps information about the USE to FILE.  */
 409
 410 extern void dump_use (FILE *, struct iv_use *);
 411 void
 412 dump_use (FILE *file, struct iv_use *use)
 413 {
 414   fprintf (file, "use %d\n", use->id);
 415
 416   switch (use->type)
 417     {
 418     case USE_NONLINEAR_EXPR:
 419       fprintf (file, "  generic\n");
 420       break;
 421
 422     case USE_ADDRESS:
 423       fprintf (file, "  address\n");
 424       break;
 425
 426     case USE_COMPARE:
 427       fprintf (file, "  compare\n");
 428       break;
 429
 430     default:
 431       gcc_unreachable ();
 432     }
 433
 434   fprintf (file, "  in statement ");
 435   print_generic_expr (file, use->stmt, TDF_SLIM);
 436   fprintf (file, "\n");
 437
 438   fprintf (file, "  at position ");
 439   if (use->op_p)
 440     print_generic_expr (file, *use->op_p, TDF_SLIM);
 441   fprintf (file, "\n");
 442
 443   dump_iv (file, use->iv);
 444
 445   if (use->related_cands)
 446     {
 447       fprintf (file, "  related candidates ");
 448       dump_bitmap (file, use->related_cands);
 449     }
 450 }
 451
 452 /* Dumps information about the uses to FILE.  */
 453
 454 extern void dump_uses (FILE *, struct ivopts_data *);
 455 void
 456 dump_uses (FILE *file, struct ivopts_data *data)
 457 {
 458   unsigned i;
 459   struct iv_use *use;
 460
 461   for (i = 0; i < n_iv_uses (data); i++)
 462     {
 463       use = iv_use (data, i);
 464
 465       dump_use (file, use);
 466       fprintf (file, "\n");
 467     }
 468 }
 469
 470 /* Dumps information about induction variable candidate CAND to FILE.  */
 471
 472 extern void dump_cand (FILE *, struct iv_cand *);
 473 void
 474 dump_cand (FILE *file, struct iv_cand *cand)
 475 {
 476   struct iv *iv = cand->iv;
 477
 478   fprintf (file, "candidate %d%s\n",
 479            cand->id, cand->important ? " (important)" : "");
 480
 481   if (cand->depends_on)
 482     {
 483       fprintf (file, "  depends on ");
 484       dump_bitmap (file, cand->depends_on);
 485     }
 486
 487   if (!iv)
 488     {
 489       fprintf (file, "  final value replacement\n");
 490       return;
 491     }
 492
 493   switch (cand->pos)
 494     {
 495     case IP_NORMAL:
 496       fprintf (file, "  incremented before exit test\n");
 497       break;
 498
 499     case IP_END:
 500       fprintf (file, "  incremented at end\n");
 501       break;
 502
 503     case IP_ORIGINAL:
 504       fprintf (file, "  original biv\n");
 505       break;
 506     }
 507
 508   dump_iv (file, iv);
 509 }
 510
 511 /* Returns the info for ssa version VER.  */
 512
 513 static inline struct version_info *
 514 ver_info (struct ivopts_data *data, unsigned ver)
 515 {
 516   return data->version_info + ver;
 517 }
 518
 519 /* Returns the info for ssa name NAME.  */
 520
 521 static inline struct version_info *
 522 name_info (struct ivopts_data *data, tree name)
 523 {
 524   return ver_info (data, SSA_NAME_VERSION (name));
 525 }
 526
 527 /* Checks whether there exists number X such that X * B = A, counting modulo
 528    2^BITS.  */
 529
 530 static bool
 531 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 532         HOST_WIDE_INT *x)
 533 {
 534   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 535   unsigned HOST_WIDE_INT inv, ex, val;
 536   unsigned i;
 537
 538   a &= mask;
 539   b &= mask;
 540
 541   /* First divide the whole equation by 2 as long as possible.  */
 542   while (!(a & 1) && !(b & 1))
 543     {
 544       a >>= 1;
 545       b >>= 1;
 546       bits--;
 547       mask >>= 1;
 548     }
 549
 550   if (!(b & 1))
 551     {
 552       /* If b is still even, a is odd and there is no such x.  */
 553       return false;
 554     }
 555
 556   /* Find the inverse of b.  We compute it as
 557      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 558   inv = 1;
 559   ex = b;
 560   for (i = 0; i < bits - 1; i++)
 561     {
 562       inv = (inv * ex) & mask;
 563       ex = (ex * ex) & mask;
 564     }
 565
 566   val = (a * inv) & mask;
 567
 568   gcc_assert (((val * b) & mask) == a);
 569
 570   if ((val >> (bits - 1)) & 1)
 571     val |= ~mask;
 572
 573   *x = val;
 574
 575   return true;
 576 }
 577
 578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 579    emitted in LOOP.  */
 580
 581 static bool
 582 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 583 {
 584   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 585
 586   gcc_assert (bb);
 587
 588   if (sbb == loop->latch)
 589     return true;
 590
 591   if (sbb != bb)
 592     return false;
 593
 594   return stmt == last_stmt (bb);
 595 }
 596
 597 /* Returns true if STMT if after the place where the original induction
 598    variable CAND is incremented.  */
 599
 600 static bool
 601 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 602 {
 603   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 604   basic_block stmt_bb = bb_for_stmt (stmt);
 605   block_stmt_iterator bsi;
 606
 607   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 608     return false;
 609
 610   if (stmt_bb != cand_bb)
 611     return true;
 612
 613   /* Scan the block from the end, since the original ivs are usually
 614      incremented at the end of the loop body.  */
 615   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 616     {
 617       if (bsi_stmt (bsi) == cand->incremented_at)
 618         return false;
 619       if (bsi_stmt (bsi) == stmt)
 620         return true;
 621     }
 622 }
 623
 624 /* Returns true if STMT if after the place where the induction variable
 625    CAND is incremented in LOOP.  */
 626
 627 static bool
 628 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 629 {
 630   switch (cand->pos)
 631     {
 632     case IP_END:
 633       return false;
 634
 635     case IP_NORMAL:
 636       return stmt_after_ip_normal_pos (loop, stmt);
 637
 638     case IP_ORIGINAL:
 639       return stmt_after_ip_original_pos (cand, stmt);
 640
 641     default:
 642       gcc_unreachable ();
 643     }
 644 }
 645
 646 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 647
 648 static bool
 649 abnormal_ssa_name_p (tree exp)
 650 {
 651   if (!exp)
 652     return false;
 653
 654   if (TREE_CODE (exp) != SSA_NAME)
 655     return false;
 656
 657   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 658 }
 659
 660 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 661    abnormal phi node.  Callback for for_each_index.  */
 662
 663 static bool
 664 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 665                                   void *data ATTRIBUTE_UNUSED)
 666 {
 667   if (TREE_CODE (base) == ARRAY_REF)
 668     {
 669       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 670         return false;
 671       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 672         return false;
 673     }
 674
 675   return !abnormal_ssa_name_p (*index);
 676 }
 677
 678 /* Returns true if EXPR contains a ssa name that occurs in an
 679    abnormal phi node.  */
 680
 681 bool
 682 contains_abnormal_ssa_name_p (tree expr)
 683 {
 684   enum tree_code code;
 685   enum tree_code_class class;
 686
 687   if (!expr)
 688     return false;
 689
 690   code = TREE_CODE (expr);
 691   class = TREE_CODE_CLASS (code);
 692
 693   if (code == SSA_NAME)
 694     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 695
 696   if (code == INTEGER_CST
 697       || is_gimple_min_invariant (expr))
 698     return false;
 699
 700   if (code == ADDR_EXPR)
 701     return !for_each_index (&TREE_OPERAND (expr, 0),
 702                             idx_contains_abnormal_ssa_name_p,
 703                             NULL);
 704
 705   switch (class)
 706     {
 707     case tcc_binary:
 708     case tcc_comparison:
 709       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 710         return true;
 711
 712       /* Fallthru.  */
 713     case tcc_unary:
 714       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 715         return true;
 716
 717       break;
 718
 719     default:
 720       gcc_unreachable ();
 721     }
 722
 723   return false;
 724 }
 725
 726 /* Element of the table in that we cache the numbers of iterations obtained
 727    from exits of the loop.  */
 728
 729 struct nfe_cache_elt
 730 {
 731   /* The edge for that the number of iterations is cached.  */
 732   edge exit;
 733
 734   /* Number of iterations corresponding to this exit, or NULL if it cannot be
 735      determined.  */
 736   tree niter;
 737 };
 738
 739 /* Hash function for nfe_cache_elt E.  */
 740
 741 static hashval_t
 742 nfe_hash (const void *e)
 743 {
 744   const struct nfe_cache_elt *elt = e;
 745
 746   return htab_hash_pointer (elt->exit);
 747 }
 748
 749 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 750
 751 static int
 752 nfe_eq (const void *e1, const void *e2)
 753 {
 754   const struct nfe_cache_elt *elt1 = e1;
 755
 756   return elt1->exit == e2;
 757 }
 758
 759 /*  Returns tree describing number of iterations determined from
 760     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 761
 762 static tree
 763 niter_for_exit (struct ivopts_data *data, edge exit)
 764 {
 765   struct nfe_cache_elt *nfe_desc;
 766   struct tree_niter_desc desc;
 767   PTR *slot;
 768
 769   slot = htab_find_slot_with_hash (data->niters, exit,
 770                                    htab_hash_pointer (exit),
 771                                    INSERT);
 772
 773   if (!*slot)
 774     {
 775       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 776       nfe_desc->exit = exit;
 777
 778       /* Try to determine number of iterations.  We must know it
 779          unconditionally (i.e., without possibility of # of iterations
 780          being zero).  Also, we cannot safely work with ssa names that
 781          appear in phi nodes on abnormal edges, so that we do not create
 782          overlapping life ranges for them (PR 27283).  */
 783       if (number_of_iterations_exit (data->current_loop,
 784                                      exit, &desc, true)
 785           && zero_p (desc.may_be_zero)
 786           && !contains_abnormal_ssa_name_p (desc.niter))
 787         nfe_desc->niter = desc.niter;
 788       else
 789         nfe_desc->niter = NULL_TREE;
 790     }
 791   else
 792     nfe_desc = *slot;
 793
 794   return nfe_desc->niter;
 795 }
 796
 797 /* Returns tree describing number of iterations determined from
 798    single dominating exit of DATA->current_loop, or NULL if something
 799    goes wrong.  */
 800
 801 static tree
 802 niter_for_single_dom_exit (struct ivopts_data *data)
 803 {
 804   edge exit = single_dom_exit (data->current_loop);
 805
 806   if (!exit)
 807     return NULL;
 808
 809   return niter_for_exit (data, exit);
 810 }
 811
 812 /* Initializes data structures used by the iv optimization pass, stored
 813    in DATA.  */
 814
 815 static void
 816 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 817 {
 818   data->version_info_size = 2 * num_ssa_names;
 819   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 820   data->relevant = BITMAP_ALLOC (NULL);
 821   data->important_candidates = BITMAP_ALLOC (NULL);
 822   data->max_inv_id = 0;
 823   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 824   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 825   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 826   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 827 }
 828
 829 /* Returns a memory object to that EXPR points.  In case we are able to
 830    determine that it does not point to any such object, NULL is returned.  */
 831
 832 static tree
 833 determine_base_object (tree expr)
 834 {
 835   enum tree_code code = TREE_CODE (expr);
 836   tree base, obj, op0, op1;
 837
 838   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 839     return NULL_TREE;
 840
 841   switch (code)
 842     {
 843     case INTEGER_CST:
 844       return NULL_TREE;
 845
 846     case ADDR_EXPR:
 847       obj = TREE_OPERAND (expr, 0);
 848       base = get_base_address (obj);
 849
 850       if (!base)
 851         return expr;
 852
 853       if (TREE_CODE (base) == INDIRECT_REF)
 854         return determine_base_object (TREE_OPERAND (base, 0));
 855
 856       return fold_convert (ptr_type_node,
 857                            build_fold_addr_expr (base));
 858
 859     case PLUS_EXPR:
 860     case MINUS_EXPR:
 861       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 862       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 863
 864       if (!op1)
 865         return op0;
 866
 867       if (!op0)
 868         return (code == PLUS_EXPR
 869                 ? op1
 870                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 871
 872       return fold_build2 (code, ptr_type_node, op0, op1);
 873
 874     case NOP_EXPR:
 875     case CONVERT_EXPR:
 876       return determine_base_object (TREE_OPERAND (expr, 0));
 877
 878     default:
 879       return fold_convert (ptr_type_node, expr);
 880     }
 881 }
 882
 883 /* Allocates an induction variable with given initial value BASE and step STEP
 884    for loop LOOP.  */
 885
 886 static struct iv *
 887 alloc_iv (tree base, tree step)
 888 {
 889   struct iv *iv = XCNEW (struct iv);
 890
 891   if (step && integer_zerop (step))
 892     step = NULL_TREE;
 893
 894   iv->base = base;
 895   iv->base_object = determine_base_object (base);
 896   iv->step = step;
 897   iv->biv_p = false;
 898   iv->have_use_for = false;
 899   iv->use_id = 0;
 900   iv->ssa_name = NULL_TREE;
 901
 902   return iv;
 903 }
 904
 905 /* Sets STEP and BASE for induction variable IV.  */
 906
 907 static void
 908 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 909 {
 910   struct version_info *info = name_info (data, iv);
 911
 912   gcc_assert (!info->iv);
 913
 914   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 915   info->iv = alloc_iv (base, step);
 916   info->iv->ssa_name = iv;
 917 }
 918
 919 /* Finds induction variable declaration for VAR.  */
 920
 921 static struct iv *
 922 get_iv (struct ivopts_data *data, tree var)
 923 {
 924   basic_block bb;
 925
 926   if (!name_info (data, var)->iv)
 927     {
 928       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 929
 930       if (!bb
 931           || !flow_bb_inside_loop_p (data->current_loop, bb))
 932         set_iv (data, var, var, NULL_TREE);
 933     }
 934
 935   return name_info (data, var)->iv;
 936 }
 937
 938 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 939    not define a simple affine biv with nonzero step.  */
 940
 941 static tree
 942 determine_biv_step (tree phi)
 943 {
 944   struct loop *loop = bb_for_stmt (phi)->loop_father;
 945   tree name = PHI_RESULT (phi);
 946   affine_iv iv;
 947
 948   if (!is_gimple_reg (name))
 949     return NULL_TREE;
 950
 951   if (!simple_iv (loop, phi, name, &iv, true))
 952     return NULL_TREE;
 953
 954   return (zero_p (iv.step) ? NULL_TREE : iv.step);
 955 }
 956
 957 /* Finds basic ivs.  */
 958
 959 static bool
 960 find_bivs (struct ivopts_data *data)
 961 {
 962   tree phi, step, type, base;
 963   bool found = false;
 964   struct loop *loop = data->current_loop;
 965
 966   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 967     {
 968       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 969         continue;
 970
 971       step = determine_biv_step (phi);
 972       if (!step)
 973         continue;
 974
 975       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 976       base = expand_simple_operations (base);
 977       if (contains_abnormal_ssa_name_p (base)
 978           || contains_abnormal_ssa_name_p (step))
 979         continue;
 980
 981       type = TREE_TYPE (PHI_RESULT (phi));
 982       base = fold_convert (type, base);
 983       if (step)
 984         step = fold_convert (type, step);
 985
 986       set_iv (data, PHI_RESULT (phi), base, step);
 987       found = true;
 988     }
 989
 990   return found;
 991 }
 992
 993 /* Marks basic ivs.  */
 994
 995 static void
 996 mark_bivs (struct ivopts_data *data)
 997 {
 998   tree phi, var;
 999   struct iv *iv, *incr_iv;
1000   struct loop *loop = data->current_loop;
1001   basic_block incr_bb;
1002
1003   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1004     {
1005       iv = get_iv (data, PHI_RESULT (phi));
1006       if (!iv)
1007         continue;
1008
1009       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1010       incr_iv = get_iv (data, var);
1011       if (!incr_iv)
1012         continue;
1013
1014       /* If the increment is in the subloop, ignore it.  */
1015       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1016       if (incr_bb->loop_father != data->current_loop
1017           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1018         continue;
1019
1020       iv->biv_p = true;
1021       incr_iv->biv_p = true;
1022     }
1023 }
1024
1025 /* Checks whether STMT defines a linear induction variable and stores its
1026    parameters to IV.  */
1027
1028 static bool
1029 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1030 {
1031   tree lhs;
1032   struct loop *loop = data->current_loop;
1033
1034   iv->base = NULL_TREE;
1035   iv->step = NULL_TREE;
1036
1037   if (TREE_CODE (stmt) != MODIFY_EXPR)
1038     return false;
1039
1040   lhs = TREE_OPERAND (stmt, 0);
1041   if (TREE_CODE (lhs) != SSA_NAME)
1042     return false;
1043
1044   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1045     return false;
1046   iv->base = expand_simple_operations (iv->base);
1047
1048   if (contains_abnormal_ssa_name_p (iv->base)
1049       || contains_abnormal_ssa_name_p (iv->step))
1050     return false;
1051
1052   return true;
1053 }
1054
1055 /* Finds general ivs in statement STMT.  */
1056
1057 static void
1058 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1059 {
1060   affine_iv iv;
1061
1062   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1063     return;
1064
1065   set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1066 }
1067
1068 /* Finds general ivs in basic block BB.  */
1069
1070 static void
1071 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1072 {
1073   block_stmt_iterator bsi;
1074
1075   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1076     find_givs_in_stmt (data, bsi_stmt (bsi));
1077 }
1078
1079 /* Finds general ivs.  */
1080
1081 static void
1082 find_givs (struct ivopts_data *data)
1083 {
1084   struct loop *loop = data->current_loop;
1085   basic_block *body = get_loop_body_in_dom_order (loop);
1086   unsigned i;
1087
1088   for (i = 0; i < loop->num_nodes; i++)
1089     find_givs_in_bb (data, body[i]);
1090   free (body);
1091 }
1092
1093 /* For each ssa name defined in LOOP determines whether it is an induction
1094    variable and if so, its initial value and step.  */
1095
1096 static bool
1097 find_induction_variables (struct ivopts_data *data)
1098 {
1099   unsigned i;
1100   bitmap_iterator bi;
1101
1102   if (!find_bivs (data))
1103     return false;
1104
1105   find_givs (data);
1106   mark_bivs (data);
1107
1108   if (dump_file && (dump_flags & TDF_DETAILS))
1109     {
1110       tree niter = niter_for_single_dom_exit (data);
1111
1112       if (niter)
1113         {
1114           fprintf (dump_file, "  number of iterations ");
1115           print_generic_expr (dump_file, niter, TDF_SLIM);
1116           fprintf (dump_file, "\n\n");
1117         };
1118
1119       fprintf (dump_file, "Induction variables:\n\n");
1120
1121       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1122         {
1123           if (ver_info (data, i)->iv)
1124             dump_iv (dump_file, ver_info (data, i)->iv);
1125         }
1126     }
1127
1128   return true;
1129 }
1130
1131 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1132
1133 static struct iv_use *
1134 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1135             tree stmt, enum use_type use_type)
1136 {
1137   struct iv_use *use = XCNEW (struct iv_use);
1138
1139   use->id = n_iv_uses (data);
1140   use->type = use_type;
1141   use->iv = iv;
1142   use->stmt = stmt;
1143   use->op_p = use_p;
1144   use->related_cands = BITMAP_ALLOC (NULL);
1145
1146   /* To avoid showing ssa name in the dumps, if it was not reset by the
1147      caller.  */
1148   iv->ssa_name = NULL_TREE;
1149
1150   if (dump_file && (dump_flags & TDF_DETAILS))
1151     dump_use (dump_file, use);
1152
1153   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1154
1155   return use;
1156 }
1157
1158 /* Checks whether OP is a loop-level invariant and if so, records it.
1159    NONLINEAR_USE is true if the invariant is used in a way we do not
1160    handle specially.  */
1161
1162 static void
1163 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1164 {
1165   basic_block bb;
1166   struct version_info *info;
1167
1168   if (TREE_CODE (op) != SSA_NAME
1169       || !is_gimple_reg (op))
1170     return;
1171
1172   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1173   if (bb
1174       && flow_bb_inside_loop_p (data->current_loop, bb))
1175     return;
1176
1177   info = name_info (data, op);
1178   info->name = op;
1179   info->has_nonlin_use |= nonlinear_use;
1180   if (!info->inv_id)
1181     info->inv_id = ++data->max_inv_id;
1182   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1183 }
1184
1185 /* Checks whether the use OP is interesting and if so, records it.  */
1186
1187 static struct iv_use *
1188 find_interesting_uses_op (struct ivopts_data *data, tree op)
1189 {
1190   struct iv *iv;
1191   struct iv *civ;
1192   tree stmt;
1193   struct iv_use *use;
1194
1195   if (TREE_CODE (op) != SSA_NAME)
1196     return NULL;
1197
1198   iv = get_iv (data, op);
1199   if (!iv)
1200     return NULL;
1201
1202   if (iv->have_use_for)
1203     {
1204       use = iv_use (data, iv->use_id);
1205
1206       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1207       return use;
1208     }
1209
1210   if (zero_p (iv->step))
1211     {
1212       record_invariant (data, op, true);
1213       return NULL;
1214     }
1215   iv->have_use_for = true;
1216
1217   civ = XNEW (struct iv);
1218   *civ = *iv;
1219
1220   stmt = SSA_NAME_DEF_STMT (op);
1221   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1222               || TREE_CODE (stmt) == MODIFY_EXPR);
1223
1224   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1225   iv->use_id = use->id;
1226
1227   return use;
1228 }
1229
1230 /* Checks whether the condition *COND_P in STMT is interesting
1231    and if so, records it.  */
1232
1233 static void
1234 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1235 {
1236   tree *op0_p;
1237   tree *op1_p;
1238   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1239   struct iv const_iv;
1240   tree zero = integer_zero_node;
1241
1242   const_iv.step = NULL_TREE;
1243
1244   if (TREE_CODE (*cond_p) != SSA_NAME
1245       && !COMPARISON_CLASS_P (*cond_p))
1246     return;
1247
1248   if (TREE_CODE (*cond_p) == SSA_NAME)
1249     {
1250       op0_p = cond_p;
1251       op1_p = &zero;
1252     }
1253   else
1254     {
1255       op0_p = &TREE_OPERAND (*cond_p, 0);
1256       op1_p = &TREE_OPERAND (*cond_p, 1);
1257     }
1258
1259   if (TREE_CODE (*op0_p) == SSA_NAME)
1260     iv0 = get_iv (data, *op0_p);
1261   else
1262     iv0 = &const_iv;
1263
1264   if (TREE_CODE (*op1_p) == SSA_NAME)
1265     iv1 = get_iv (data, *op1_p);
1266   else
1267     iv1 = &const_iv;
1268
1269   if (/* When comparing with non-invariant value, we may not do any senseful
1270          induction variable elimination.  */
1271       (!iv0 || !iv1)
1272       /* Eliminating condition based on two ivs would be nontrivial.
1273          ??? TODO -- it is not really important to handle this case.  */
1274       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1275     {
1276       find_interesting_uses_op (data, *op0_p);
1277       find_interesting_uses_op (data, *op1_p);
1278       return;
1279     }
1280
1281   if (zero_p (iv0->step) && zero_p (iv1->step))
1282     {
1283       /* If both are invariants, this is a work for unswitching.  */
1284       return;
1285     }
1286
1287   civ = XNEW (struct iv);
1288   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1289   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1290 }
1291
1292 /* Returns true if expression EXPR is obviously invariant in LOOP,
1293    i.e. if all its operands are defined outside of the LOOP.  */
1294
1295 bool
1296 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1297 {
1298   basic_block def_bb;
1299   unsigned i, len;
1300
1301   if (is_gimple_min_invariant (expr))
1302     return true;
1303
1304   if (TREE_CODE (expr) == SSA_NAME)
1305     {
1306       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1307       if (def_bb
1308           && flow_bb_inside_loop_p (loop, def_bb))
1309         return false;
1310
1311       return true;
1312     }
1313
1314   if (!EXPR_P (expr))
1315     return false;
1316
1317   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1318   for (i = 0; i < len; i++)
1319     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1320       return false;
1321
1322   return true;
1323 }
1324
1325 /* Cumulates the steps of indices into DATA and replaces their values with the
1326    initial ones.  Returns false when the value of the index cannot be determined.
1327    Callback for for_each_index.  */
1328
1329 struct ifs_ivopts_data
1330 {
1331   struct ivopts_data *ivopts_data;
1332   tree stmt;
1333   tree *step_p;
1334 };
1335
1336 static bool
1337 idx_find_step (tree base, tree *idx, void *data)
1338 {
1339   struct ifs_ivopts_data *dta = data;
1340   struct iv *iv;
1341   tree step, iv_base, iv_step, lbound, off;
1342   struct loop *loop = dta->ivopts_data->current_loop;
1343
1344   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1345       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1346     return false;
1347
1348   /* If base is a component ref, require that the offset of the reference
1349      be invariant.  */
1350   if (TREE_CODE (base) == COMPONENT_REF)
1351     {
1352       off = component_ref_field_offset (base);
1353       return expr_invariant_in_loop_p (loop, off);
1354     }
1355
1356   /* If base is array, first check whether we will be able to move the
1357      reference out of the loop (in order to take its address in strength
1358      reduction).  In order for this to work we need both lower bound
1359      and step to be loop invariants.  */
1360   if (TREE_CODE (base) == ARRAY_REF)
1361     {
1362       step = array_ref_element_size (base);
1363       lbound = array_ref_low_bound (base);
1364
1365       if (!expr_invariant_in_loop_p (loop, step)
1366           || !expr_invariant_in_loop_p (loop, lbound))
1367         return false;
1368     }
1369
1370   if (TREE_CODE (*idx) != SSA_NAME)
1371     return true;
1372
1373   iv = get_iv (dta->ivopts_data, *idx);
1374   if (!iv)
1375     return false;
1376
1377   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1378           *&x[0], which is not folded and does not trigger the
1379           ARRAY_REF path below.  */
1380   *idx = iv->base;
1381
1382   if (!iv->step)
1383     return true;
1384
1385   if (TREE_CODE (base) == ARRAY_REF)
1386     {
1387       step = array_ref_element_size (base);
1388
1389       /* We only handle addresses whose step is an integer constant.  */
1390       if (TREE_CODE (step) != INTEGER_CST)
1391         return false;
1392     }
1393   else
1394     /* The step for pointer arithmetics already is 1 byte.  */
1395     step = build_int_cst (sizetype, 1);
1396
1397   iv_base = iv->base;
1398   iv_step = iv->step;
1399   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1400                             sizetype, &iv_base, &iv_step, dta->stmt,
1401                             false))
1402     {
1403       /* The index might wrap.  */
1404       return false;
1405     }
1406
1407   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1408
1409   if (!*dta->step_p)
1410     *dta->step_p = step;
1411   else
1412     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1413
1414   return true;
1415 }
1416
1417 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1418    object is passed to it in DATA.  */
1419
1420 static bool
1421 idx_record_use (tree base, tree *idx,
1422                 void *data)
1423 {
1424   find_interesting_uses_op (data, *idx);
1425   if (TREE_CODE (base) == ARRAY_REF)
1426     {
1427       find_interesting_uses_op (data, array_ref_element_size (base));
1428       find_interesting_uses_op (data, array_ref_low_bound (base));
1429     }
1430   return true;
1431 }
1432
1433 /* Returns true if memory reference REF may be unaligned.  */
1434
1435 static bool
1436 may_be_unaligned_p (tree ref)
1437 {
1438   tree base;
1439   tree base_type;
1440   HOST_WIDE_INT bitsize;
1441   HOST_WIDE_INT bitpos;
1442   tree toffset;
1443   enum machine_mode mode;
1444   int unsignedp, volatilep;
1445   unsigned base_align;
1446
1447   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1448      thus they are not misaligned.  */
1449   if (TREE_CODE (ref) == TARGET_MEM_REF)
1450     return false;
1451
1452   /* The test below is basically copy of what expr.c:normal_inner_ref
1453      does to check whether the object must be loaded by parts when
1454      STRICT_ALIGNMENT is true.  */
1455   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1456                               &unsignedp, &volatilep, true);
1457   base_type = TREE_TYPE (base);
1458   base_align = TYPE_ALIGN (base_type);
1459
1460   if (mode != BLKmode
1461       && (base_align < GET_MODE_ALIGNMENT (mode)
1462           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1463           || bitpos % BITS_PER_UNIT != 0))
1464     return true;
1465
1466   return false;
1467 }
1468
1469 /* Finds addresses in *OP_P inside STMT.  */
1470
1471 static void
1472 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1473 {
1474   tree base = *op_p, step = NULL;
1475   struct iv *civ;
1476   struct ifs_ivopts_data ifs_ivopts_data;
1477
1478   /* Do not play with volatile memory references.  A bit too conservative,
1479      perhaps, but safe.  */
1480   if (stmt_ann (stmt)->has_volatile_ops)
1481     goto fail;
1482
1483   /* Ignore bitfields for now.  Not really something terribly complicated
1484      to handle.  TODO.  */
1485   if (TREE_CODE (base) == BIT_FIELD_REF
1486       || (TREE_CODE (base) == COMPONENT_REF
1487           && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1))))
1488     goto fail;
1489
1490   if (STRICT_ALIGNMENT
1491       && may_be_unaligned_p (base))
1492     goto fail;
1493
1494   base = unshare_expr (base);
1495
1496   if (TREE_CODE (base) == TARGET_MEM_REF)
1497     {
1498       tree type = build_pointer_type (TREE_TYPE (base));
1499       tree astep;
1500
1501       if (TMR_BASE (base)
1502           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1503         {
1504           civ = get_iv (data, TMR_BASE (base));
1505           if (!civ)
1506             goto fail;
1507
1508           TMR_BASE (base) = civ->base;
1509           step = civ->step;
1510         }
1511       if (TMR_INDEX (base)
1512           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1513         {
1514           civ = get_iv (data, TMR_INDEX (base));
1515           if (!civ)
1516             goto fail;
1517
1518           TMR_INDEX (base) = civ->base;
1519           astep = civ->step;
1520
1521           if (astep)
1522             {
1523               if (TMR_STEP (base))
1524                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1525
1526               if (step)
1527                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1528               else
1529                 step = astep;
1530             }
1531         }
1532
1533       if (zero_p (step))
1534         goto fail;
1535       base = tree_mem_ref_addr (type, base);
1536     }
1537   else
1538     {
1539       ifs_ivopts_data.ivopts_data = data;
1540       ifs_ivopts_data.stmt = stmt;
1541       ifs_ivopts_data.step_p = &step;
1542       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1543           || zero_p (step))
1544         goto fail;
1545
1546       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1547       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1548
1549       base = build_fold_addr_expr (base);
1550
1551       /* Substituting bases of IVs into the base expression might
1552          have caused folding opportunities.  */
1553       if (TREE_CODE (base) == ADDR_EXPR)
1554         {
1555           tree *ref = &TREE_OPERAND (base, 0);
1556           while (handled_component_p (*ref))
1557             ref = &TREE_OPERAND (*ref, 0);
1558           if (TREE_CODE (*ref) == INDIRECT_REF)
1559             *ref = fold_indirect_ref (*ref);
1560         }
1561     }
1562
1563   civ = alloc_iv (base, step);
1564   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1565   return;
1566
1567 fail:
1568   for_each_index (op_p, idx_record_use, data);
1569 }
1570
1571 /* Finds and records invariants used in STMT.  */
1572
1573 static void
1574 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1575 {
1576   ssa_op_iter iter;
1577   use_operand_p use_p;
1578   tree op;
1579
1580   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1581     {
1582       op = USE_FROM_PTR (use_p);
1583       record_invariant (data, op, false);
1584     }
1585 }
1586
1587 /* Finds interesting uses of induction variables in the statement STMT.  */
1588
1589 static void
1590 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1591 {
1592   struct iv *iv;
1593   tree op, lhs, rhs;
1594   ssa_op_iter iter;
1595   use_operand_p use_p;
1596
1597   find_invariants_stmt (data, stmt);
1598
1599   if (TREE_CODE (stmt) == COND_EXPR)
1600     {
1601       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1602       return;
1603     }
1604
1605   if (TREE_CODE (stmt) == MODIFY_EXPR)
1606     {
1607       lhs = TREE_OPERAND (stmt, 0);
1608       rhs = TREE_OPERAND (stmt, 1);
1609
1610       if (TREE_CODE (lhs) == SSA_NAME)
1611         {
1612           /* If the statement defines an induction variable, the uses are not
1613              interesting by themselves.  */
1614
1615           iv = get_iv (data, lhs);
1616
1617           if (iv && !zero_p (iv->step))
1618             return;
1619         }
1620
1621       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1622         {
1623         case tcc_comparison:
1624           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1625           return;
1626
1627         case tcc_reference:
1628           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1629           if (REFERENCE_CLASS_P (lhs))
1630             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1631           return;
1632
1633         default: ;
1634         }
1635
1636       if (REFERENCE_CLASS_P (lhs)
1637           && is_gimple_val (rhs))
1638         {
1639           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1640           find_interesting_uses_op (data, rhs);
1641           return;
1642         }
1643
1644       /* TODO -- we should also handle address uses of type
1645
1646          memory = call (whatever);
1647
1648          and
1649
1650          call (memory).  */
1651     }
1652
1653   if (TREE_CODE (stmt) == PHI_NODE
1654       && bb_for_stmt (stmt) == data->current_loop->header)
1655     {
1656       lhs = PHI_RESULT (stmt);
1657       iv = get_iv (data, lhs);
1658
1659       if (iv && !zero_p (iv->step))
1660         return;
1661     }
1662
1663   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1664     {
1665       op = USE_FROM_PTR (use_p);
1666
1667       if (TREE_CODE (op) != SSA_NAME)
1668         continue;
1669
1670       iv = get_iv (data, op);
1671       if (!iv)
1672         continue;
1673
1674       find_interesting_uses_op (data, op);
1675     }
1676 }
1677
1678 /* Finds interesting uses of induction variables outside of loops
1679    on loop exit edge EXIT.  */
1680
1681 static void
1682 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1683 {
1684   tree phi, def;
1685
1686   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1687     {
1688       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1689       find_interesting_uses_op (data, def);
1690     }
1691 }
1692
1693 /* Finds uses of the induction variables that are interesting.  */
1694
1695 static void
1696 find_interesting_uses (struct ivopts_data *data)
1697 {
1698   basic_block bb;
1699   block_stmt_iterator bsi;
1700   tree phi;
1701   basic_block *body = get_loop_body (data->current_loop);
1702   unsigned i;
1703   struct version_info *info;
1704   edge e;
1705
1706   if (dump_file && (dump_flags & TDF_DETAILS))
1707     fprintf (dump_file, "Uses:\n\n");
1708
1709   for (i = 0; i < data->current_loop->num_nodes; i++)
1710     {
1711       edge_iterator ei;
1712       bb = body[i];
1713
1714       FOR_EACH_EDGE (e, ei, bb->succs)
1715         if (e->dest != EXIT_BLOCK_PTR
1716             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1717           find_interesting_uses_outside (data, e);
1718
1719       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1720         find_interesting_uses_stmt (data, phi);
1721       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1722         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1723     }
1724
1725   if (dump_file && (dump_flags & TDF_DETAILS))
1726     {
1727       bitmap_iterator bi;
1728
1729       fprintf (dump_file, "\n");
1730
1731       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1732         {
1733           info = ver_info (data, i);
1734           if (info->inv_id)
1735             {
1736               fprintf (dump_file, "  ");
1737               print_generic_expr (dump_file, info->name, TDF_SLIM);
1738               fprintf (dump_file, " is invariant (%d)%s\n",
1739                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1740             }
1741         }
1742
1743       fprintf (dump_file, "\n");
1744     }
1745
1746   free (body);
1747 }
1748
1749 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1750    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1751    we are at the top-level of the processed address.  */
1752
1753 static tree
1754 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1755                 unsigned HOST_WIDE_INT *offset)
1756 {
1757   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1758   enum tree_code code;
1759   tree type, orig_type = TREE_TYPE (expr);
1760   unsigned HOST_WIDE_INT off0, off1, st;
1761   tree orig_expr = expr;
1762
1763   STRIP_NOPS (expr);
1764
1765   type = TREE_TYPE (expr);
1766   code = TREE_CODE (expr);
1767   *offset = 0;
1768
1769   switch (code)
1770     {
1771     case INTEGER_CST:
1772       if (!cst_and_fits_in_hwi (expr)
1773           || zero_p (expr))
1774         return orig_expr;
1775
1776       *offset = int_cst_value (expr);
1777       return build_int_cst (orig_type, 0);
1778
1779     case PLUS_EXPR:
1780     case MINUS_EXPR:
1781       op0 = TREE_OPERAND (expr, 0);
1782       op1 = TREE_OPERAND (expr, 1);
1783
1784       op0 = strip_offset_1 (op0, false, false, &off0);
1785       op1 = strip_offset_1 (op1, false, false, &off1);
1786
1787       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1788       if (op0 == TREE_OPERAND (expr, 0)
1789           && op1 == TREE_OPERAND (expr, 1))
1790         return orig_expr;
1791
1792       if (zero_p (op1))
1793         expr = op0;
1794       else if (zero_p (op0))
1795         {
1796           if (code == PLUS_EXPR)
1797             expr = op1;
1798           else
1799             expr = fold_build1 (NEGATE_EXPR, type, op1);
1800         }
1801       else
1802         expr = fold_build2 (code, type, op0, op1);
1803
1804       return fold_convert (orig_type, expr);
1805
1806     case ARRAY_REF:
1807       if (!inside_addr)
1808         return orig_expr;
1809
1810       step = array_ref_element_size (expr);
1811       if (!cst_and_fits_in_hwi (step))
1812         break;
1813
1814       st = int_cst_value (step);
1815       op1 = TREE_OPERAND (expr, 1);
1816       op1 = strip_offset_1 (op1, false, false, &off1);
1817       *offset = off1 * st;
1818
1819       if (top_compref
1820           && zero_p (op1))
1821         {
1822           /* Strip the component reference completely.  */
1823           op0 = TREE_OPERAND (expr, 0);
1824           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1825           *offset += off0;
1826           return op0;
1827         }
1828       break;
1829
1830     case COMPONENT_REF:
1831       if (!inside_addr)
1832         return orig_expr;
1833
1834       tmp = component_ref_field_offset (expr);
1835       if (top_compref
1836           && cst_and_fits_in_hwi (tmp))
1837         {
1838           /* Strip the component reference completely.  */
1839           op0 = TREE_OPERAND (expr, 0);
1840           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1841           *offset = off0 + int_cst_value (tmp);
1842           return op0;
1843         }
1844       break;
1845
1846     case ADDR_EXPR:
1847       op0 = TREE_OPERAND (expr, 0);
1848       op0 = strip_offset_1 (op0, true, true, &off0);
1849       *offset += off0;
1850
1851       if (op0 == TREE_OPERAND (expr, 0))
1852         return orig_expr;
1853
1854       expr = build_fold_addr_expr (op0);
1855       return fold_convert (orig_type, expr);
1856
1857     case INDIRECT_REF:
1858       inside_addr = false;
1859       break;
1860
1861     default:
1862       return orig_expr;
1863     }
1864
1865   /* Default handling of expressions for that we want to recurse into
1866      the first operand.  */
1867   op0 = TREE_OPERAND (expr, 0);
1868   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1869   *offset += off0;
1870
1871   if (op0 == TREE_OPERAND (expr, 0)
1872       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1873     return orig_expr;
1874
1875   expr = copy_node (expr);
1876   TREE_OPERAND (expr, 0) = op0;
1877   if (op1)
1878     TREE_OPERAND (expr, 1) = op1;
1879
1880   /* Inside address, we might strip the top level component references,
1881      thus changing type of the expression.  Handling of ADDR_EXPR
1882      will fix that.  */
1883   expr = fold_convert (orig_type, expr);
1884
1885   return expr;
1886 }
1887
1888 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1889
1890 static tree
1891 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1892 {
1893   return strip_offset_1 (expr, false, false, offset);
1894 }
1895
1896 /* Returns variant of TYPE that can be used as base for different uses.
1897    For integer types, we return unsigned variant of the type, which
1898    avoids problems with overflows.  For pointer types, we return void *.  */
1899
1900 static tree
1901 generic_type_for (tree type)
1902 {
1903   if (POINTER_TYPE_P (type))
1904     return ptr_type_node;
1905
1906   if (TYPE_UNSIGNED (type))
1907     return type;
1908
1909   return unsigned_type_for (type);
1910 }
1911
1912 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1913    the bitmap to that we should store it.  */
1914
1915 static struct ivopts_data *fd_ivopts_data;
1916 static tree
1917 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1918 {
1919   bitmap *depends_on = data;
1920   struct version_info *info;
1921
1922   if (TREE_CODE (*expr_p) != SSA_NAME)
1923     return NULL_TREE;
1924   info = name_info (fd_ivopts_data, *expr_p);
1925
1926   if (!info->inv_id || info->has_nonlin_use)
1927     return NULL_TREE;
1928
1929   if (!*depends_on)
1930     *depends_on = BITMAP_ALLOC (NULL);
1931   bitmap_set_bit (*depends_on, info->inv_id);
1932
1933   return NULL_TREE;
1934 }
1935
1936 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1937    position to POS.  If USE is not NULL, the candidate is set as related to
1938    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1939    replacement of the final value of the iv by a direct computation.  */
1940
1941 static struct iv_cand *
1942 add_candidate_1 (struct ivopts_data *data,
1943                  tree base, tree step, bool important, enum iv_position pos,
1944                  struct iv_use *use, tree incremented_at)
1945 {
1946   unsigned i;
1947   struct iv_cand *cand = NULL;
1948   tree type, orig_type;
1949
1950   if (base)
1951     {
1952       orig_type = TREE_TYPE (base);
1953       type = generic_type_for (orig_type);
1954       if (type != orig_type)
1955         {
1956           base = fold_convert (type, base);
1957           if (step)
1958             step = fold_convert (type, step);
1959         }
1960     }
1961
1962   for (i = 0; i < n_iv_cands (data); i++)
1963     {
1964       cand = iv_cand (data, i);
1965
1966       if (cand->pos != pos)
1967         continue;
1968
1969       if (cand->incremented_at != incremented_at)
1970         continue;
1971
1972       if (!cand->iv)
1973         {
1974           if (!base && !step)
1975             break;
1976
1977           continue;
1978         }
1979
1980       if (!base && !step)
1981         continue;
1982
1983       if (!operand_equal_p (base, cand->iv->base, 0))
1984         continue;
1985
1986       if (zero_p (cand->iv->step))
1987         {
1988           if (zero_p (step))
1989             break;
1990         }
1991       else
1992         {
1993           if (step && operand_equal_p (step, cand->iv->step, 0))
1994             break;
1995         }
1996     }
1997
1998   if (i == n_iv_cands (data))
1999     {
2000       cand = XCNEW (struct iv_cand);
2001       cand->id = i;
2002
2003       if (!base && !step)
2004         cand->iv = NULL;
2005       else
2006         cand->iv = alloc_iv (base, step);
2007
2008       cand->pos = pos;
2009       if (pos != IP_ORIGINAL && cand->iv)
2010         {
2011           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2012           cand->var_after = cand->var_before;
2013         }
2014       cand->important = important;
2015       cand->incremented_at = incremented_at;
2016       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2017
2018       if (step
2019           && TREE_CODE (step) != INTEGER_CST)
2020         {
2021           fd_ivopts_data = data;
2022           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2023         }
2024
2025       if (dump_file && (dump_flags & TDF_DETAILS))
2026         dump_cand (dump_file, cand);
2027     }
2028
2029   if (important && !cand->important)
2030     {
2031       cand->important = true;
2032       if (dump_file && (dump_flags & TDF_DETAILS))
2033         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2034     }
2035
2036   if (use)
2037     {
2038       bitmap_set_bit (use->related_cands, i);
2039       if (dump_file && (dump_flags & TDF_DETAILS))
2040         fprintf (dump_file, "Candidate %d is related to use %d\n",
2041                  cand->id, use->id);
2042     }
2043
2044   return cand;
2045 }
2046
2047 /* Returns true if incrementing the induction variable at the end of the LOOP
2048    is allowed.
2049
2050    The purpose is to avoid splitting latch edge with a biv increment, thus
2051    creating a jump, possibly confusing other optimization passes and leaving
2052    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2053    is not available (so we do not have a better alternative), or if the latch
2054    edge is already nonempty.  */
2055
2056 static bool
2057 allow_ip_end_pos_p (struct loop *loop)
2058 {
2059   if (!ip_normal_pos (loop))
2060     return true;
2061
2062   if (!empty_block_p (ip_end_pos (loop)))
2063     return true;
2064
2065   return false;
2066 }
2067
2068 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2069    position to POS.  If USE is not NULL, the candidate is set as related to
2070    it.  The candidate computation is scheduled on all available positions.  */
2071
2072 static void
2073 add_candidate (struct ivopts_data *data,
2074                tree base, tree step, bool important, struct iv_use *use)
2075 {
2076   if (ip_normal_pos (data->current_loop))
2077     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2078   if (ip_end_pos (data->current_loop)
2079       && allow_ip_end_pos_p (data->current_loop))
2080     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2081 }
2082
2083 /* Add a standard "0 + 1 * iteration" iv candidate for a
2084    type with SIZE bits.  */
2085
2086 static void
2087 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2088                                      unsigned int size)
2089 {
2090   tree type = lang_hooks.types.type_for_size (size, true);
2091   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2092                  true, NULL);
2093 }
2094
2095 /* Adds standard iv candidates.  */
2096
2097 static void
2098 add_standard_iv_candidates (struct ivopts_data *data)
2099 {
2100   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2101
2102   /* The same for a double-integer type if it is still fast enough.  */
2103   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2104     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2105 }
2106
2107
2108 /* Adds candidates bases on the old induction variable IV.  */
2109
2110 static void
2111 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2112 {
2113   tree phi, def;
2114   struct iv_cand *cand;
2115
2116   add_candidate (data, iv->base, iv->step, true, NULL);
2117
2118   /* The same, but with initial value zero.  */
2119   add_candidate (data,
2120                  build_int_cst (TREE_TYPE (iv->base), 0),
2121                  iv->step, true, NULL);
2122
2123   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2124   if (TREE_CODE (phi) == PHI_NODE)
2125     {
2126       /* Additionally record the possibility of leaving the original iv
2127          untouched.  */
2128       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2129       cand = add_candidate_1 (data,
2130                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2131                               SSA_NAME_DEF_STMT (def));
2132       cand->var_before = iv->ssa_name;
2133       cand->var_after = def;
2134     }
2135 }
2136
2137 /* Adds candidates based on the old induction variables.  */
2138
2139 static void
2140 add_old_ivs_candidates (struct ivopts_data *data)
2141 {
2142   unsigned i;
2143   struct iv *iv;
2144   bitmap_iterator bi;
2145
2146   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2147     {
2148       iv = ver_info (data, i)->iv;
2149       if (iv && iv->biv_p && !zero_p (iv->step))
2150         add_old_iv_candidates (data, iv);
2151     }
2152 }
2153
2154 /* Adds candidates based on the value of the induction variable IV and USE.  */
2155
2156 static void
2157 add_iv_value_candidates (struct ivopts_data *data,
2158                          struct iv *iv, struct iv_use *use)
2159 {
2160   unsigned HOST_WIDE_INT offset;
2161   tree base;
2162
2163   add_candidate (data, iv->base, iv->step, false, use);
2164
2165   /* The same, but with initial value zero.  Make such variable important,
2166      since it is generic enough so that possibly many uses may be based
2167      on it.  */
2168   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2169                  iv->step, true, use);
2170
2171   /* Third, try removing the constant offset.  */
2172   base = strip_offset (iv->base, &offset);
2173   if (offset)
2174     add_candidate (data, base, iv->step, false, use);
2175 }
2176
2177 /* Adds candidates based on the uses.  */
2178
2179 static void
2180 add_derived_ivs_candidates (struct ivopts_data *data)
2181 {
2182   unsigned i;
2183
2184   for (i = 0; i < n_iv_uses (data); i++)
2185     {
2186       struct iv_use *use = iv_use (data, i);
2187
2188       if (!use)
2189         continue;
2190
2191       switch (use->type)
2192         {
2193         case USE_NONLINEAR_EXPR:
2194         case USE_COMPARE:
2195         case USE_ADDRESS:
2196           /* Just add the ivs based on the value of the iv used here.  */
2197           add_iv_value_candidates (data, use->iv, use);
2198           break;
2199
2200         default:
2201           gcc_unreachable ();
2202         }
2203     }
2204 }
2205
2206 /* Record important candidates and add them to related_cands bitmaps
2207    if needed.  */
2208
2209 static void
2210 record_important_candidates (struct ivopts_data *data)
2211 {
2212   unsigned i;
2213   struct iv_use *use;
2214
2215   for (i = 0; i < n_iv_cands (data); i++)
2216     {
2217       struct iv_cand *cand = iv_cand (data, i);
2218
2219       if (cand->important)
2220         bitmap_set_bit (data->important_candidates, i);
2221     }
2222
2223   data->consider_all_candidates = (n_iv_cands (data)
2224                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2225
2226   if (data->consider_all_candidates)
2227     {
2228       /* We will not need "related_cands" bitmaps in this case,
2229          so release them to decrease peak memory consumption.  */
2230       for (i = 0; i < n_iv_uses (data); i++)
2231         {
2232           use = iv_use (data, i);
2233           BITMAP_FREE (use->related_cands);
2234         }
2235     }
2236   else
2237     {
2238       /* Add important candidates to the related_cands bitmaps.  */
2239       for (i = 0; i < n_iv_uses (data); i++)
2240         bitmap_ior_into (iv_use (data, i)->related_cands,
2241                          data->important_candidates);
2242     }
2243 }
2244
2245 /* Finds the candidates for the induction variables.  */
2246
2247 static void
2248 find_iv_candidates (struct ivopts_data *data)
2249 {
2250   /* Add commonly used ivs.  */
2251   add_standard_iv_candidates (data);
2252
2253   /* Add old induction variables.  */
2254   add_old_ivs_candidates (data);
2255
2256   /* Add induction variables derived from uses.  */
2257   add_derived_ivs_candidates (data);
2258
2259   /* Record the important candidates.  */
2260   record_important_candidates (data);
2261 }
2262
2263 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2264    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2265    we allocate a simple list to every use.  */
2266
2267 static void
2268 alloc_use_cost_map (struct ivopts_data *data)
2269 {
2270   unsigned i, size, s, j;
2271
2272   for (i = 0; i < n_iv_uses (data); i++)
2273     {
2274       struct iv_use *use = iv_use (data, i);
2275       bitmap_iterator bi;
2276
2277       if (data->consider_all_candidates)
2278         size = n_iv_cands (data);
2279       else
2280         {
2281           s = 0;
2282           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2283             {
2284               s++;
2285             }
2286
2287           /* Round up to the power of two, so that moduling by it is fast.  */
2288           for (size = 1; size < s; size <<= 1)
2289             continue;
2290         }
2291
2292       use->n_map_members = size;
2293       use->cost_map = XCNEWVEC (struct cost_pair, size);
2294     }
2295 }
2296
2297 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2298    on invariants DEPENDS_ON and that the value used in expressing it
2299    is VALUE.*/
2300
2301 static void
2302 set_use_iv_cost (struct ivopts_data *data,
2303                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2304                  bitmap depends_on, tree value)
2305 {
2306   unsigned i, s;
2307
2308   if (cost == INFTY)
2309     {
2310       BITMAP_FREE (depends_on);
2311       return;
2312     }
2313
2314   if (data->consider_all_candidates)
2315     {
2316       use->cost_map[cand->id].cand = cand;
2317       use->cost_map[cand->id].cost = cost;
2318       use->cost_map[cand->id].depends_on = depends_on;
2319       use->cost_map[cand->id].value = value;
2320       return;
2321     }
2322
2323   /* n_map_members is a power of two, so this computes modulo.  */
2324   s = cand->id & (use->n_map_members - 1);
2325   for (i = s; i < use->n_map_members; i++)
2326     if (!use->cost_map[i].cand)
2327       goto found;
2328   for (i = 0; i < s; i++)
2329     if (!use->cost_map[i].cand)
2330       goto found;
2331
2332   gcc_unreachable ();
2333
2334 found:
2335   use->cost_map[i].cand = cand;
2336   use->cost_map[i].cost = cost;
2337   use->cost_map[i].depends_on = depends_on;
2338   use->cost_map[i].value = value;
2339 }
2340
2341 /* Gets cost of (USE, CANDIDATE) pair.  */
2342
2343 static struct cost_pair *
2344 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2345                  struct iv_cand *cand)
2346 {
2347   unsigned i, s;
2348   struct cost_pair *ret;
2349
2350   if (!cand)
2351     return NULL;
2352
2353   if (data->consider_all_candidates)
2354     {
2355       ret = use->cost_map + cand->id;
2356       if (!ret->cand)
2357         return NULL;
2358
2359       return ret;
2360     }
2361
2362   /* n_map_members is a power of two, so this computes modulo.  */
2363   s = cand->id & (use->n_map_members - 1);
2364   for (i = s; i < use->n_map_members; i++)
2365     if (use->cost_map[i].cand == cand)
2366       return use->cost_map + i;
2367
2368   for (i = 0; i < s; i++)
2369     if (use->cost_map[i].cand == cand)
2370       return use->cost_map + i;
2371
2372   return NULL;
2373 }
2374
2375 /* Returns estimate on cost of computing SEQ.  */
2376
2377 static unsigned
2378 seq_cost (rtx seq)
2379 {
2380   unsigned cost = 0;
2381   rtx set;
2382
2383   for (; seq; seq = NEXT_INSN (seq))
2384     {
2385       set = single_set (seq);
2386       if (set)
2387         cost += rtx_cost (set, SET);
2388       else
2389         cost++;
2390     }
2391
2392   return cost;
2393 }
2394
2395 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2396 static rtx
2397 produce_memory_decl_rtl (tree obj, int *regno)
2398 {
2399   rtx x;
2400
2401   gcc_assert (obj);
2402   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2403     {
2404       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2405       x = gen_rtx_SYMBOL_REF (Pmode, name);
2406     }
2407   else
2408     x = gen_raw_REG (Pmode, (*regno)++);
2409
2410   return gen_rtx_MEM (DECL_MODE (obj), x);
2411 }
2412
2413 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2414    walk_tree.  DATA contains the actual fake register number.  */
2415
2416 static tree
2417 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2418 {
2419   tree obj = NULL_TREE;
2420   rtx x = NULL_RTX;
2421   int *regno = data;
2422
2423   switch (TREE_CODE (*expr_p))
2424     {
2425     case ADDR_EXPR:
2426       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2427            handled_component_p (*expr_p);
2428            expr_p = &TREE_OPERAND (*expr_p, 0))
2429         continue;
2430       obj = *expr_p;
2431       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2432         x = produce_memory_decl_rtl (obj, regno);
2433       break;
2434
2435     case SSA_NAME:
2436       *ws = 0;
2437       obj = SSA_NAME_VAR (*expr_p);
2438       if (!DECL_RTL_SET_P (obj))
2439         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2440       break;
2441
2442     case VAR_DECL:
2443     case PARM_DECL:
2444     case RESULT_DECL:
2445       *ws = 0;
2446       obj = *expr_p;
2447
2448       if (DECL_RTL_SET_P (obj))
2449         break;
2450
2451       if (DECL_MODE (obj) == BLKmode)
2452         x = produce_memory_decl_rtl (obj, regno);
2453       else
2454         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2455
2456       break;
2457
2458     default:
2459       break;
2460     }
2461
2462   if (x)
2463     {
2464       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2465       SET_DECL_RTL (obj, x);
2466     }
2467
2468   return NULL_TREE;
2469 }
2470
2471 /* Determines cost of the computation of EXPR.  */
2472
2473 static unsigned
2474 computation_cost (tree expr)
2475 {
2476   rtx seq, rslt;
2477   tree type = TREE_TYPE (expr);
2478   unsigned cost;
2479   /* Avoid using hard regs in ways which may be unsupported.  */
2480   int regno = LAST_VIRTUAL_REGISTER + 1;
2481
2482   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2483   start_sequence ();
2484   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2485   seq = get_insns ();
2486   end_sequence ();
2487
2488   cost = seq_cost (seq);
2489   if (MEM_P (rslt))
2490     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2491
2492   return cost;
2493 }
2494
2495 /* Returns variable containing the value of candidate CAND at statement AT.  */
2496
2497 static tree
2498 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2499 {
2500   if (stmt_after_increment (loop, cand, stmt))
2501     return cand->var_after;
2502   else
2503     return cand->var_before;
2504 }
2505
2506 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2507    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2508
2509 int
2510 tree_int_cst_sign_bit (tree t)
2511 {
2512   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2513   unsigned HOST_WIDE_INT w;
2514
2515   if (bitno < HOST_BITS_PER_WIDE_INT)
2516     w = TREE_INT_CST_LOW (t);
2517   else
2518     {
2519       w = TREE_INT_CST_HIGH (t);
2520       bitno -= HOST_BITS_PER_WIDE_INT;
2521     }
2522
2523   return (w >> bitno) & 1;
2524 }
2525
2526 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2527    return cst.  Otherwise return NULL_TREE.  */
2528
2529 static tree
2530 constant_multiple_of (tree type, tree top, tree bot)
2531 {
2532   tree res, mby, p0, p1;
2533   enum tree_code code;
2534   bool negate;
2535
2536   STRIP_NOPS (top);
2537   STRIP_NOPS (bot);
2538
2539   if (operand_equal_p (top, bot, 0))
2540     return build_int_cst (type, 1);
2541
2542   code = TREE_CODE (top);
2543   switch (code)
2544     {
2545     case MULT_EXPR:
2546       mby = TREE_OPERAND (top, 1);
2547       if (TREE_CODE (mby) != INTEGER_CST)
2548         return NULL_TREE;
2549
2550       res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2551       if (!res)
2552         return NULL_TREE;
2553
2554       return fold_binary_to_constant (MULT_EXPR, type, res,
2555                                       fold_convert (type, mby));
2556
2557     case PLUS_EXPR:
2558     case MINUS_EXPR:
2559       p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2560       if (!p0)
2561         return NULL_TREE;
2562       p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2563       if (!p1)
2564         return NULL_TREE;
2565
2566       return fold_binary_to_constant (code, type, p0, p1);
2567
2568     case INTEGER_CST:
2569       if (TREE_CODE (bot) != INTEGER_CST)
2570         return NULL_TREE;
2571
2572       bot = fold_convert (type, bot);
2573       top = fold_convert (type, top);
2574
2575       /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2576          the result afterwards.  */
2577       if (tree_int_cst_sign_bit (bot))
2578         {
2579           negate = true;
2580           bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2581         }
2582       else
2583         negate = false;
2584
2585       /* Ditto for TOP.  */
2586       if (tree_int_cst_sign_bit (top))
2587         {
2588           negate = !negate;
2589           top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2590         }
2591
2592       if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2593         return NULL_TREE;
2594
2595       res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2596       if (negate)
2597         res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2598       return res;
2599
2600     default:
2601       return NULL_TREE;
2602     }
2603 }
2604
2605 /* Sets COMB to CST.  */
2606
2607 static void
2608 aff_combination_const (struct affine_tree_combination *comb, tree type,
2609                        unsigned HOST_WIDE_INT cst)
2610 {
2611   unsigned prec = TYPE_PRECISION (type);
2612
2613   comb->type = type;
2614   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2615
2616   comb->n = 0;
2617   comb->rest = NULL_TREE;
2618   comb->offset = cst & comb->mask;
2619 }
2620
2621 /* Sets COMB to single element ELT.  */
2622
2623 static void
2624 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2625 {
2626   unsigned prec = TYPE_PRECISION (type);
2627
2628   comb->type = type;
2629   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2630
2631   comb->n = 1;
2632   comb->elts[0] = elt;
2633   comb->coefs[0] = 1;
2634   comb->rest = NULL_TREE;
2635   comb->offset = 0;
2636 }
2637
2638 /* Scales COMB by SCALE.  */
2639
2640 static void
2641 aff_combination_scale (struct affine_tree_combination *comb,
2642                        unsigned HOST_WIDE_INT scale)
2643 {
2644   unsigned i, j;
2645
2646   if (scale == 1)
2647     return;
2648
2649   if (scale == 0)
2650     {
2651       aff_combination_const (comb, comb->type, 0);
2652       return;
2653     }
2654
2655   comb->offset = (scale * comb->offset) & comb->mask;
2656   for (i = 0, j = 0; i < comb->n; i++)
2657     {
2658       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2659       comb->elts[j] = comb->elts[i];
2660       if (comb->coefs[j] != 0)
2661         j++;
2662     }
2663   comb->n = j;
2664
2665   if (comb->rest)
2666     {
2667       if (comb->n < MAX_AFF_ELTS)
2668         {
2669           comb->coefs[comb->n] = scale;
2670           comb->elts[comb->n] = comb->rest;
2671           comb->rest = NULL_TREE;
2672           comb->n++;
2673         }
2674       else
2675         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2676                                   build_int_cst_type (comb->type, scale));
2677     }
2678 }
2679
2680 /* Adds ELT * SCALE to COMB.  */
2681
2682 static void
2683 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2684                          unsigned HOST_WIDE_INT scale)
2685 {
2686   unsigned i;
2687
2688   if (scale == 0)
2689     return;
2690
2691   for (i = 0; i < comb->n; i++)
2692     if (operand_equal_p (comb->elts[i], elt, 0))
2693       {
2694         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2695         if (comb->coefs[i])
2696           return;
2697
2698         comb->n--;
2699         comb->coefs[i] = comb->coefs[comb->n];
2700         comb->elts[i] = comb->elts[comb->n];
2701
2702         if (comb->rest)
2703           {
2704             gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2705             comb->coefs[comb->n] = 1;
2706             comb->elts[comb->n] = comb->rest;
2707             comb->rest = NULL_TREE;
2708             comb->n++;
2709           }
2710         return;
2711       }
2712   if (comb->n < MAX_AFF_ELTS)
2713     {
2714       comb->coefs[comb->n] = scale;
2715       comb->elts[comb->n] = elt;
2716       comb->n++;
2717       return;
2718     }
2719
2720   if (scale == 1)
2721     elt = fold_convert (comb->type, elt);
2722   else
2723     elt = fold_build2 (MULT_EXPR, comb->type,
2724                        fold_convert (comb->type, elt),
2725                        build_int_cst_type (comb->type, scale));
2726
2727   if (comb->rest)
2728     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2729   else
2730     comb->rest = elt;
2731 }
2732
2733 /* Adds COMB2 to COMB1.  */
2734
2735 static void
2736 aff_combination_add (struct affine_tree_combination *comb1,
2737                      struct affine_tree_combination *comb2)
2738 {
2739   unsigned i;
2740
2741   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2742   for (i = 0; i < comb2->n; i++)
2743     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2744   if (comb2->rest)
2745     aff_combination_add_elt (comb1, comb2->rest, 1);
2746 }
2747
2748 /* Splits EXPR into an affine combination of parts.  */
2749
2750 static void
2751 tree_to_aff_combination (tree expr, tree type,
2752                          struct affine_tree_combination *comb)
2753 {
2754   struct affine_tree_combination tmp;
2755   enum tree_code code;
2756   tree cst, core, toffset;
2757   HOST_WIDE_INT bitpos, bitsize;
2758   enum machine_mode mode;
2759   int unsignedp, volatilep;
2760
2761   STRIP_NOPS (expr);
2762
2763   code = TREE_CODE (expr);
2764   switch (code)
2765     {
2766     case INTEGER_CST:
2767       aff_combination_const (comb, type, int_cst_value (expr));
2768       return;
2769
2770     case PLUS_EXPR:
2771     case MINUS_EXPR:
2772       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2773       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2774       if (code == MINUS_EXPR)
2775         aff_combination_scale (&tmp, -1);
2776       aff_combination_add (comb, &tmp);
2777       return;
2778
2779     case MULT_EXPR:
2780       cst = TREE_OPERAND (expr, 1);
2781       if (TREE_CODE (cst) != INTEGER_CST)
2782         break;
2783       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2784       aff_combination_scale (comb, int_cst_value (cst));
2785       return;
2786
2787     case NEGATE_EXPR:
2788       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2789       aff_combination_scale (comb, -1);
2790       return;
2791
2792     case ADDR_EXPR:
2793       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2794                                   &toffset, &mode, &unsignedp, &volatilep,
2795                                   false);
2796       if (bitpos % BITS_PER_UNIT != 0)
2797         break;
2798       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2799       core = build_fold_addr_expr (core);
2800       if (TREE_CODE (core) == ADDR_EXPR)
2801         aff_combination_add_elt (comb, core, 1);
2802       else
2803         {
2804           tree_to_aff_combination (core, type, &tmp);
2805           aff_combination_add (comb, &tmp);
2806         }
2807       if (toffset)
2808         {
2809           tree_to_aff_combination (toffset, type, &tmp);
2810           aff_combination_add (comb, &tmp);
2811         }
2812       return;
2813
2814     default:
2815       break;
2816     }
2817
2818   aff_combination_elt (comb, type, expr);
2819 }
2820
2821 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2822
2823 static tree
2824 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2825                  unsigned HOST_WIDE_INT mask)
2826 {
2827   enum tree_code code;
2828
2829   scale &= mask;
2830   elt = fold_convert (type, elt);
2831
2832   if (scale == 1)
2833     {
2834       if (!expr)
2835         return elt;
2836
2837       return fold_build2 (PLUS_EXPR, type, expr, elt);
2838     }
2839
2840   if (scale == mask)
2841     {
2842       if (!expr)
2843         return fold_build1 (NEGATE_EXPR, type, elt);
2844
2845       return fold_build2 (MINUS_EXPR, type, expr, elt);
2846     }
2847
2848   if (!expr)
2849     return fold_build2 (MULT_EXPR, type, elt,
2850                         build_int_cst_type (type, scale));
2851
2852   if ((scale | (mask >> 1)) == mask)
2853     {
2854       /* Scale is negative.  */
2855       code = MINUS_EXPR;
2856       scale = (-scale) & mask;
2857     }
2858   else
2859     code = PLUS_EXPR;
2860
2861   elt = fold_build2 (MULT_EXPR, type, elt,
2862                      build_int_cst_type (type, scale));
2863   return fold_build2 (code, type, expr, elt);
2864 }
2865
2866 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2867
2868 static void
2869 unshare_aff_combination (struct affine_tree_combination *comb)
2870 {
2871   unsigned i;
2872
2873   for (i = 0; i < comb->n; i++)
2874     comb->elts[i] = unshare_expr (comb->elts[i]);
2875   if (comb->rest)
2876     comb->rest = unshare_expr (comb->rest);
2877 }
2878
2879 /* Makes tree from the affine combination COMB.  */
2880
2881 static tree
2882 aff_combination_to_tree (struct affine_tree_combination *comb)
2883 {
2884   tree type = comb->type;
2885   tree expr = comb->rest;
2886   unsigned i;
2887   unsigned HOST_WIDE_INT off, sgn;
2888
2889   /* Handle the special case produced by get_computation_aff when
2890      the type does not fit in HOST_WIDE_INT.  */
2891   if (comb->n == 0 && comb->offset == 0)
2892     return fold_convert (type, expr);
2893
2894   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2895
2896   for (i = 0; i < comb->n; i++)
2897     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2898                             comb->mask);
2899
2900   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2901     {
2902       /* Offset is negative.  */
2903       off = (-comb->offset) & comb->mask;
2904       sgn = comb->mask;
2905     }
2906   else
2907     {
2908       off = comb->offset;
2909       sgn = 1;
2910     }
2911   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2912                           comb->mask);
2913 }
2914
2915 /* Determines the expression by that USE is expressed from induction variable
2916    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2917    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2918
2919 static bool
2920 get_computation_aff (struct loop *loop,
2921                      struct iv_use *use, struct iv_cand *cand, tree at,
2922                      struct affine_tree_combination *aff)
2923 {
2924   tree ubase = use->iv->base;
2925   tree ustep = use->iv->step;
2926   tree cbase = cand->iv->base;
2927   tree cstep = cand->iv->step;
2928   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2929   tree uutype;
2930   tree expr, delta;
2931   tree ratio;
2932   unsigned HOST_WIDE_INT ustepi, cstepi;
2933   HOST_WIDE_INT ratioi;
2934   struct affine_tree_combination cbase_aff, expr_aff;
2935   tree cstep_orig = cstep, ustep_orig = ustep;
2936
2937   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2938     {
2939       /* We do not have a precision to express the values of use.  */
2940       return false;
2941     }
2942
2943   expr = var_at_stmt (loop, cand, at);
2944
2945   if (TREE_TYPE (expr) != ctype)
2946     {
2947       /* This may happen with the original ivs.  */
2948       expr = fold_convert (ctype, expr);
2949     }
2950
2951   if (TYPE_UNSIGNED (utype))
2952     uutype = utype;
2953   else
2954     {
2955       uutype = unsigned_type_for (utype);
2956       ubase = fold_convert (uutype, ubase);
2957       ustep = fold_convert (uutype, ustep);
2958     }
2959
2960   if (uutype != ctype)
2961     {
2962       expr = fold_convert (uutype, expr);
2963       cbase = fold_convert (uutype, cbase);
2964       cstep = fold_convert (uutype, cstep);
2965
2966       /* If the conversion is not noop, we must take it into account when
2967          considering the value of the step.  */
2968       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2969         cstep_orig = cstep;
2970     }
2971
2972   if (cst_and_fits_in_hwi (cstep_orig)
2973       && cst_and_fits_in_hwi (ustep_orig))
2974     {
2975       ustepi = int_cst_value (ustep_orig);
2976       cstepi = int_cst_value (cstep_orig);
2977
2978       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2979         {
2980           /* TODO maybe consider case when ustep divides cstep and the ratio is
2981              a power of 2 (so that the division is fast to execute)?  We would
2982              need to be much more careful with overflows etc. then.  */
2983           return false;
2984         }
2985
2986       ratio = build_int_cst_type (uutype, ratioi);
2987     }
2988   else
2989     {
2990       ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
2991       if (!ratio)
2992         return false;
2993
2994       /* Ratioi is only used to detect special cases when the multiplicative
2995          factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
2996          we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
2997          to integer_onep/integer_all_onesp, since the former ignores
2998          TREE_OVERFLOW.  */
2999       if (cst_and_fits_in_hwi (ratio))
3000         ratioi = int_cst_value (ratio);
3001       else if (integer_onep (ratio))
3002         ratioi = 1;
3003       else if (integer_all_onesp (ratio))
3004         ratioi = -1;
3005       else
3006         ratioi = 0;
3007     }
3008
3009   /* We may need to shift the value if we are after the increment.  */
3010   if (stmt_after_increment (loop, cand, at))
3011     cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3012
3013   /* use = ubase - ratio * cbase + ratio * var.
3014
3015      In general case ubase + ratio * (var - cbase) could be better (one less
3016      multiplication), but often it is possible to eliminate redundant parts
3017      of computations from (ubase - ratio * cbase) term, and if it does not
3018      happen, fold is able to apply the distributive law to obtain this form
3019      anyway.  */
3020
3021   if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3022     {
3023       /* Let's compute in trees and just return the result in AFF.  This case
3024          should not be very common, and fold itself is not that bad either,
3025          so making the aff. functions more complicated to handle this case
3026          is not that urgent.  */
3027       if (ratioi == 1)
3028         {
3029           delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3030           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3031         }
3032       else if (ratioi == -1)
3033         {
3034           delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3035           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3036         }
3037       else
3038         {
3039           delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3040           delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3041           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3042           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3043         }
3044
3045       aff->type = uutype;
3046       aff->n = 0;
3047       aff->offset = 0;
3048       aff->mask = 0;
3049       aff->rest = expr;
3050       return true;
3051     }
3052
3053   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3054      possible to compute ratioi.  */
3055   gcc_assert (ratioi);
3056
3057   tree_to_aff_combination (ubase, uutype, aff);
3058   tree_to_aff_combination (cbase, uutype, &cbase_aff);
3059   tree_to_aff_combination (expr, uutype, &expr_aff);
3060   aff_combination_scale (&cbase_aff, -ratioi);
3061   aff_combination_scale (&expr_aff, ratioi);
3062   aff_combination_add (aff, &cbase_aff);
3063   aff_combination_add (aff, &expr_aff);
3064
3065   return true;
3066 }
3067
3068 /* Determines the expression by that USE is expressed from induction variable
3069    CAND at statement AT in LOOP.  The computation is unshared.  */
3070
3071 static tree
3072 get_computation_at (struct loop *loop,
3073                     struct iv_use *use, struct iv_cand *cand, tree at)
3074 {
3075   struct affine_tree_combination aff;
3076   tree type = TREE_TYPE (use->iv->base);
3077
3078   if (!get_computation_aff (loop, use, cand, at, &aff))
3079     return NULL_TREE;
3080   unshare_aff_combination (&aff);
3081   return fold_convert (type, aff_combination_to_tree (&aff));
3082 }
3083
3084 /* Determines the expression by that USE is expressed from induction variable
3085    CAND in LOOP.  The computation is unshared.  */
3086
3087 static tree
3088 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3089 {
3090   return get_computation_at (loop, use, cand, use->stmt);
3091 }
3092
3093 /* Returns cost of addition in MODE.  */
3094
3095 static unsigned
3096 add_cost (enum machine_mode mode)
3097 {
3098   static unsigned costs[NUM_MACHINE_MODES];
3099   rtx seq;
3100   unsigned cost;
3101
3102   if (costs[mode])
3103     return costs[mode];
3104
3105   start_sequence ();
3106   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3107                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3108                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3109                  NULL_RTX);
3110   seq = get_insns ();
3111   end_sequence ();
3112
3113   cost = seq_cost (seq);
3114   if (!cost)
3115     cost = 1;
3116
3117   costs[mode] = cost;
3118
3119   if (dump_file && (dump_flags & TDF_DETAILS))
3120     fprintf (dump_file, "Addition in %s costs %d\n",
3121              GET_MODE_NAME (mode), cost);
3122   return cost;
3123 }
3124
3125 /* Entry in a hashtable of already known costs for multiplication.  */
3126 struct mbc_entry
3127 {
3128   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3129   enum machine_mode mode;       /* In mode.  */
3130   unsigned cost;                /* The cost.  */
3131 };
3132
3133 /* Counts hash value for the ENTRY.  */
3134
3135 static hashval_t
3136 mbc_entry_hash (const void *entry)
3137 {
3138   const struct mbc_entry *e = entry;
3139
3140   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3141 }
3142
3143 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3144
3145 static int
3146 mbc_entry_eq (const void *entry1, const void *entry2)
3147 {
3148   const struct mbc_entry *e1 = entry1;
3149   const struct mbc_entry *e2 = entry2;
3150
3151   return (e1->mode == e2->mode
3152           && e1->cst == e2->cst);
3153 }
3154
3155 /* Returns cost of multiplication by constant CST in MODE.  */
3156
3157 unsigned
3158 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3159 {
3160   static htab_t costs;
3161   struct mbc_entry **cached, act;
3162   rtx seq;
3163   unsigned cost;
3164
3165   if (!costs)
3166     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3167
3168   act.mode = mode;
3169   act.cst = cst;
3170   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3171   if (*cached)
3172     return (*cached)->cost;
3173
3174   *cached = XNEW (struct mbc_entry);
3175   (*cached)->mode = mode;
3176   (*cached)->cst = cst;
3177
3178   start_sequence ();
3179   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3180                gen_int_mode (cst, mode), NULL_RTX, 0);
3181   seq = get_insns ();
3182   end_sequence ();
3183
3184   cost = seq_cost (seq);
3185
3186   if (dump_file && (dump_flags & TDF_DETAILS))
3187     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3188              (int) cst, GET_MODE_NAME (mode), cost);
3189
3190   (*cached)->cost = cost;
3191
3192   return cost;
3193 }
3194
3195 /* Returns true if multiplying by RATIO is allowed in address.  */
3196
3197 bool
3198 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3199 {
3200 #define MAX_RATIO 128
3201   static sbitmap valid_mult;
3202
3203   if (!valid_mult)
3204     {
3205       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3206       rtx addr;
3207       HOST_WIDE_INT i;
3208
3209       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3210       sbitmap_zero (valid_mult);
3211       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3212       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3213         {
3214           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3215           if (memory_address_p (Pmode, addr))
3216             SET_BIT (valid_mult, i + MAX_RATIO);
3217         }
3218
3219       if (dump_file && (dump_flags & TDF_DETAILS))
3220         {
3221           fprintf (dump_file, "  allowed multipliers:");
3222           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3223             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3224               fprintf (dump_file, " %d", (int) i);
3225           fprintf (dump_file, "\n");
3226           fprintf (dump_file, "\n");
3227         }
3228     }
3229
3230   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3231     return false;
3232
3233   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3234 }
3235
3236 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3237    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3238    variable is omitted.  The created memory accesses MODE.
3239
3240    TODO -- there must be some better way.  This all is quite crude.  */
3241
3242 static unsigned
3243 get_address_cost (bool symbol_present, bool var_present,
3244                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3245 {
3246   static bool initialized = false;
3247   static HOST_WIDE_INT rat, off;
3248   static HOST_WIDE_INT min_offset, max_offset;
3249   static unsigned costs[2][2][2][2];
3250   unsigned cost, acost;
3251   rtx seq, addr, base;
3252   bool offset_p, ratio_p;
3253   rtx reg1;
3254   HOST_WIDE_INT s_offset;
3255   unsigned HOST_WIDE_INT mask;
3256   unsigned bits;
3257
3258   if (!initialized)
3259     {
3260       HOST_WIDE_INT i;
3261       initialized = true;
3262
3263       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3264
3265       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3266       for (i = 1; i <= 1 << 20; i <<= 1)
3267         {
3268           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3269           if (!memory_address_p (Pmode, addr))
3270             break;
3271         }
3272       max_offset = i >> 1;
3273       off = max_offset;
3274
3275       for (i = 1; i <= 1 << 20; i <<= 1)
3276         {
3277           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3278           if (!memory_address_p (Pmode, addr))
3279             break;
3280         }
3281       min_offset = -(i >> 1);
3282
3283       if (dump_file && (dump_flags & TDF_DETAILS))
3284         {
3285           fprintf (dump_file, "get_address_cost:\n");
3286           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3287           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3288         }
3289
3290       rat = 1;
3291       for (i = 2; i <= MAX_RATIO; i++)
3292         if (multiplier_allowed_in_address_p (i))
3293           {
3294             rat = i;
3295             break;
3296           }
3297     }
3298
3299   bits = GET_MODE_BITSIZE (Pmode);
3300   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3301   offset &= mask;
3302   if ((offset >> (bits - 1) & 1))
3303     offset |= ~mask;
3304   s_offset = offset;
3305
3306   cost = 0;
3307   offset_p = (s_offset != 0
3308               && min_offset <= s_offset && s_offset <= max_offset);
3309   ratio_p = (ratio != 1
3310              && multiplier_allowed_in_address_p (ratio));
3311
3312   if (ratio != 1 && !ratio_p)
3313     cost += multiply_by_cost (ratio, Pmode);
3314
3315   if (s_offset && !offset_p && !symbol_present)
3316     {
3317       cost += add_cost (Pmode);
3318       var_present = true;
3319     }
3320
3321   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3322   if (!acost)
3323     {
3324       int old_cse_not_expected;
3325       acost = 0;
3326
3327       addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3328       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3329       if (ratio_p)
3330         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3331
3332       if (var_present)
3333         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3334
3335       if (symbol_present)
3336         {
3337           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3338           if (offset_p)
3339             base = gen_rtx_fmt_e (CONST, Pmode,
3340                                   gen_rtx_fmt_ee (PLUS, Pmode,
3341                                                   base,
3342                                                   gen_int_mode (off, Pmode)));
3343         }
3344       else if (offset_p)
3345         base = gen_int_mode (off, Pmode);
3346       else
3347         base = NULL_RTX;
3348
3349       if (base)
3350         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3351
3352       start_sequence ();
3353       /* To avoid splitting addressing modes, pretend that no cse will
3354          follow.  */
3355       old_cse_not_expected = cse_not_expected;
3356       cse_not_expected = true;
3357       addr = memory_address (Pmode, addr);
3358       cse_not_expected = old_cse_not_expected;
3359       seq = get_insns ();
3360       end_sequence ();
3361
3362       acost = seq_cost (seq);
3363       acost += address_cost (addr, Pmode);
3364
3365       if (!acost)
3366         acost = 1;
3367       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3368     }
3369
3370   return cost + acost;
3371 }
3372
3373 /* Estimates cost of forcing expression EXPR into a variable.  */
3374
3375 unsigned
3376 force_expr_to_var_cost (tree expr)
3377 {
3378   static bool costs_initialized = false;
3379   static unsigned integer_cost;
3380   static unsigned symbol_cost;
3381   static unsigned address_cost;
3382   tree op0, op1;
3383   unsigned cost0, cost1, cost;
3384   enum machine_mode mode;
3385
3386   if (!costs_initialized)
3387     {
3388       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3389       rtx x = gen_rtx_MEM (DECL_MODE (var),
3390                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3391       tree addr;
3392       tree type = build_pointer_type (integer_type_node);
3393
3394       integer_cost = computation_cost (build_int_cst (integer_type_node,
3395                                                       2000));
3396
3397       SET_DECL_RTL (var, x);
3398       TREE_STATIC (var) = 1;
3399       addr = build1 (ADDR_EXPR, type, var);
3400       symbol_cost = computation_cost (addr) + 1;
3401
3402       address_cost
3403         = computation_cost (build2 (PLUS_EXPR, type,
3404                                     addr,
3405                                     build_int_cst (type, 2000))) + 1;
3406       if (dump_file && (dump_flags & TDF_DETAILS))
3407         {
3408           fprintf (dump_file, "force_expr_to_var_cost:\n");
3409           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3410           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3411           fprintf (dump_file, "  address %d\n", (int) address_cost);
3412           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3413           fprintf (dump_file, "\n");
3414         }
3415
3416       costs_initialized = true;
3417     }
3418
3419   STRIP_NOPS (expr);
3420
3421   if (SSA_VAR_P (expr))
3422     return 0;
3423
3424   if (TREE_INVARIANT (expr))
3425     {
3426       if (TREE_CODE (expr) == INTEGER_CST)
3427         return integer_cost;
3428
3429       if (TREE_CODE (expr) == ADDR_EXPR)
3430         {
3431           tree obj = TREE_OPERAND (expr, 0);
3432
3433           if (TREE_CODE (obj) == VAR_DECL
3434               || TREE_CODE (obj) == PARM_DECL
3435               || TREE_CODE (obj) == RESULT_DECL)
3436             return symbol_cost;
3437         }
3438
3439       return address_cost;
3440     }
3441
3442   switch (TREE_CODE (expr))
3443     {
3444     case PLUS_EXPR:
3445     case MINUS_EXPR:
3446     case MULT_EXPR:
3447       op0 = TREE_OPERAND (expr, 0);
3448       op1 = TREE_OPERAND (expr, 1);
3449       STRIP_NOPS (op0);
3450       STRIP_NOPS (op1);
3451
3452       if (is_gimple_val (op0))
3453         cost0 = 0;
3454       else
3455         cost0 = force_expr_to_var_cost (op0);
3456
3457       if (is_gimple_val (op1))
3458         cost1 = 0;
3459       else
3460         cost1 = force_expr_to_var_cost (op1);
3461
3462       break;
3463
3464     default:
3465       /* Just an arbitrary value, FIXME.  */
3466       return target_spill_cost;
3467     }
3468
3469   mode = TYPE_MODE (TREE_TYPE (expr));
3470   switch (TREE_CODE (expr))
3471     {
3472     case PLUS_EXPR:
3473     case MINUS_EXPR:
3474       cost = add_cost (mode);
3475       break;
3476
3477     case MULT_EXPR:
3478       if (cst_and_fits_in_hwi (op0))
3479         cost = multiply_by_cost (int_cst_value (op0), mode);
3480       else if (cst_and_fits_in_hwi (op1))
3481         cost = multiply_by_cost (int_cst_value (op1), mode);
3482       else
3483         return target_spill_cost;
3484       break;
3485
3486     default:
3487       gcc_unreachable ();
3488     }
3489
3490   cost += cost0;
3491   cost += cost1;
3492
3493   /* Bound the cost by target_spill_cost.  The parts of complicated
3494      computations often are either loop invariant or at least can
3495      be shared between several iv uses, so letting this grow without
3496      limits would not give reasonable results.  */
3497   return cost < target_spill_cost ? cost : target_spill_cost;
3498 }
3499
3500 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3501    invariants the computation depends on.  */
3502
3503 static unsigned
3504 force_var_cost (struct ivopts_data *data,
3505                 tree expr, bitmap *depends_on)
3506 {
3507   if (depends_on)
3508     {
3509       fd_ivopts_data = data;
3510       walk_tree (&expr, find_depends, depends_on, NULL);
3511     }
3512
3513   return force_expr_to_var_cost (expr);
3514 }
3515
3516 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3517    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3518    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3519    invariants the computation depends on.  */
3520
3521 static unsigned
3522 split_address_cost (struct ivopts_data *data,
3523                     tree addr, bool *symbol_present, bool *var_present,
3524                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3525 {
3526   tree core;
3527   HOST_WIDE_INT bitsize;
3528   HOST_WIDE_INT bitpos;
3529   tree toffset;
3530   enum machine_mode mode;
3531   int unsignedp, volatilep;
3532
3533   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3534                               &unsignedp, &volatilep, false);
3535
3536   if (toffset != 0
3537       || bitpos % BITS_PER_UNIT != 0
3538       || TREE_CODE (core) != VAR_DECL)
3539     {
3540       *symbol_present = false;
3541       *var_present = true;
3542       fd_ivopts_data = data;
3543       walk_tree (&addr, find_depends, depends_on, NULL);
3544       return target_spill_cost;
3545     }
3546
3547   *offset += bitpos / BITS_PER_UNIT;
3548   if (TREE_STATIC (core)
3549       || DECL_EXTERNAL (core))
3550     {
3551       *symbol_present = true;
3552       *var_present = false;
3553       return 0;
3554     }
3555
3556   *symbol_present = false;
3557   *var_present = true;
3558   return 0;
3559 }
3560
3561 /* Estimates cost of expressing difference of addresses E1 - E2 as
3562    var + symbol + offset.  The value of offset is added to OFFSET,
3563    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3564    part is missing.  DEPENDS_ON is a set of the invariants the computation
3565    depends on.  */
3566
3567 static unsigned
3568 ptr_difference_cost (struct ivopts_data *data,
3569                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3570                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3571 {
3572   HOST_WIDE_INT diff = 0;
3573   unsigned cost;
3574
3575   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3576
3577   if (ptr_difference_const (e1, e2, &diff))
3578     {
3579       *offset += diff;
3580       *symbol_present = false;
3581       *var_present = false;
3582       return 0;
3583     }
3584
3585   if (e2 == integer_zero_node)
3586     return split_address_cost (data, TREE_OPERAND (e1, 0),
3587                                symbol_present, var_present, offset, depends_on);
3588
3589   *symbol_present = false;
3590   *var_present = true;
3591
3592   cost = force_var_cost (data, e1, depends_on);
3593   cost += force_var_cost (data, e2, depends_on);
3594   cost += add_cost (Pmode);
3595
3596   return cost;
3597 }
3598
3599 /* Estimates cost of expressing difference E1 - E2 as
3600    var + symbol + offset.  The value of offset is added to OFFSET,
3601    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3602    part is missing.  DEPENDS_ON is a set of the invariants the computation
3603    depends on.  */
3604
3605 static unsigned
3606 difference_cost (struct ivopts_data *data,
3607                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3608                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3609 {
3610   unsigned cost;
3611   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3612   unsigned HOST_WIDE_INT off1, off2;
3613
3614   e1 = strip_offset (e1, &off1);
3615   e2 = strip_offset (e2, &off2);
3616   *offset += off1 - off2;
3617
3618   STRIP_NOPS (e1);
3619   STRIP_NOPS (e2);
3620
3621   if (TREE_CODE (e1) == ADDR_EXPR)
3622     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3623                                 depends_on);
3624   *symbol_present = false;
3625
3626   if (operand_equal_p (e1, e2, 0))
3627     {
3628       *var_present = false;
3629       return 0;
3630     }
3631   *var_present = true;
3632   if (zero_p (e2))
3633     return force_var_cost (data, e1, depends_on);
3634
3635   if (zero_p (e1))
3636     {
3637       cost = force_var_cost (data, e2, depends_on);
3638       cost += multiply_by_cost (-1, mode);
3639
3640       return cost;
3641     }
3642
3643   cost = force_var_cost (data, e1, depends_on);
3644   cost += force_var_cost (data, e2, depends_on);
3645   cost += add_cost (mode);
3646
3647   return cost;
3648 }
3649
3650 /* Determines the cost of the computation by that USE is expressed
3651    from induction variable CAND.  If ADDRESS_P is true, we just need
3652    to create an address from it, otherwise we want to get it into
3653    register.  A set of invariants we depend on is stored in
3654    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3655
3656 static unsigned
3657 get_computation_cost_at (struct ivopts_data *data,
3658                          struct iv_use *use, struct iv_cand *cand,
3659                          bool address_p, bitmap *depends_on, tree at)
3660 {
3661   tree ubase = use->iv->base, ustep = use->iv->step;
3662   tree cbase, cstep;
3663   tree utype = TREE_TYPE (ubase), ctype;
3664   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3665   HOST_WIDE_INT ratio, aratio;
3666   bool var_present, symbol_present;
3667   unsigned cost = 0, n_sums;
3668
3669   *depends_on = NULL;
3670
3671   /* Only consider real candidates.  */
3672   if (!cand->iv)
3673     return INFTY;
3674
3675   cbase = cand->iv->base;
3676   cstep = cand->iv->step;
3677   ctype = TREE_TYPE (cbase);
3678
3679   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3680     {
3681       /* We do not have a precision to express the values of use.  */
3682       return INFTY;
3683     }
3684
3685   if (address_p)
3686     {
3687       /* Do not try to express address of an object with computation based
3688          on address of a different object.  This may cause problems in rtl
3689          level alias analysis (that does not expect this to be happening,
3690          as this is illegal in C), and would be unlikely to be useful
3691          anyway.  */
3692       if (use->iv->base_object
3693           && cand->iv->base_object
3694           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3695         return INFTY;
3696     }
3697
3698   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3699     {
3700       /* TODO -- add direct handling of this case.  */
3701       goto fallback;
3702     }
3703
3704   /* CSTEPI is removed from the offset in case statement is after the
3705      increment.  If the step is not constant, we use zero instead.
3706      This is a bit imprecise (there is the extra addition), but
3707      redundancy elimination is likely to transform the code so that
3708      it uses value of the variable before increment anyway,
3709      so it is not that much unrealistic.  */
3710   if (cst_and_fits_in_hwi (cstep))
3711     cstepi = int_cst_value (cstep);
3712   else
3713     cstepi = 0;
3714
3715   if (cst_and_fits_in_hwi (ustep)
3716       && cst_and_fits_in_hwi (cstep))
3717     {
3718       ustepi = int_cst_value (ustep);
3719
3720       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3721         return INFTY;
3722     }
3723   else
3724     {
3725       tree rat;
3726
3727       rat = constant_multiple_of (utype, ustep, cstep);
3728
3729       if (!rat)
3730         return INFTY;
3731
3732       if (cst_and_fits_in_hwi (rat))
3733         ratio = int_cst_value (rat);
3734       else if (integer_onep (rat))
3735         ratio = 1;
3736       else if (integer_all_onesp (rat))
3737         ratio = -1;
3738       else
3739         return INFTY;
3740     }
3741
3742   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3743      or ratio == 1, it is better to handle this like
3744
3745      ubase - ratio * cbase + ratio * var
3746
3747      (also holds in the case ratio == -1, TODO.  */
3748
3749   if (cst_and_fits_in_hwi (cbase))
3750     {
3751       offset = - ratio * int_cst_value (cbase);
3752       cost += difference_cost (data,
3753                                ubase, integer_zero_node,
3754                                &symbol_present, &var_present, &offset,
3755                                depends_on);
3756     }
3757   else if (ratio == 1)
3758     {
3759       cost += difference_cost (data,
3760                                ubase, cbase,
3761                                &symbol_present, &var_present, &offset,
3762                                depends_on);
3763     }
3764   else
3765     {
3766       cost += force_var_cost (data, cbase, depends_on);
3767       cost += add_cost (TYPE_MODE (ctype));
3768       cost += difference_cost (data,
3769                                ubase, integer_zero_node,
3770                                &symbol_present, &var_present, &offset,
3771                                depends_on);
3772     }
3773
3774   /* If we are after the increment, the value of the candidate is higher by
3775      one iteration.  */
3776   if (stmt_after_increment (data->current_loop, cand, at))
3777     offset -= ratio * cstepi;
3778
3779   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3780      (symbol/var/const parts may be omitted).  If we are looking for an address,
3781      find the cost of addressing this.  */
3782   if (address_p)
3783     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3784
3785   /* Otherwise estimate the costs for computing the expression.  */
3786   aratio = ratio > 0 ? ratio : -ratio;
3787   if (!symbol_present && !var_present && !offset)
3788     {
3789       if (ratio != 1)
3790         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3791
3792       return cost;
3793     }
3794
3795   if (aratio != 1)
3796     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3797
3798   n_sums = 1;
3799   if (var_present
3800       /* Symbol + offset should be compile-time computable.  */
3801       && (symbol_present || offset))
3802     n_sums++;
3803
3804   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3805
3806 fallback:
3807   {
3808     /* Just get the expression, expand it and measure the cost.  */
3809     tree comp = get_computation_at (data->current_loop, use, cand, at);
3810
3811     if (!comp)
3812       return INFTY;
3813
3814     if (address_p)
3815       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3816
3817     return computation_cost (comp);
3818   }
3819 }
3820
3821 /* Determines the cost of the computation by that USE is expressed
3822    from induction variable CAND.  If ADDRESS_P is true, we just need
3823    to create an address from it, otherwise we want to get it into
3824    register.  A set of invariants we depend on is stored in
3825    DEPENDS_ON.  */
3826
3827 static unsigned
3828 get_computation_cost (struct ivopts_data *data,
3829                       struct iv_use *use, struct iv_cand *cand,
3830                       bool address_p, bitmap *depends_on)
3831 {
3832   return get_computation_cost_at (data,
3833                                   use, cand, address_p, depends_on, use->stmt);
3834 }
3835
3836 /* Determines cost of basing replacement of USE on CAND in a generic
3837    expression.  */
3838
3839 static bool
3840 determine_use_iv_cost_generic (struct ivopts_data *data,
3841                                struct iv_use *use, struct iv_cand *cand)
3842 {
3843   bitmap depends_on;
3844   unsigned cost;
3845
3846   /* The simple case first -- if we need to express value of the preserved
3847      original biv, the cost is 0.  This also prevents us from counting the
3848      cost of increment twice -- once at this use and once in the cost of
3849      the candidate.  */
3850   if (cand->pos == IP_ORIGINAL
3851       && cand->incremented_at == use->stmt)
3852     {
3853       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3854       return true;
3855     }
3856
3857   cost = get_computation_cost (data, use, cand, false, &depends_on);
3858   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3859
3860   return cost != INFTY;
3861 }
3862
3863 /* Determines cost of basing replacement of USE on CAND in an address.  */
3864
3865 static bool
3866 determine_use_iv_cost_address (struct ivopts_data *data,
3867                                struct iv_use *use, struct iv_cand *cand)
3868 {
3869   bitmap depends_on;
3870   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3871
3872   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3873
3874   return cost != INFTY;
3875 }
3876
3877 /* Computes value of induction variable IV in iteration NITER.  */
3878
3879 static tree
3880 iv_value (struct iv *iv, tree niter)
3881 {
3882   tree val;
3883   tree type = TREE_TYPE (iv->base);
3884
3885   niter = fold_convert (type, niter);
3886   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3887
3888   return fold_build2 (PLUS_EXPR, type, iv->base, val);
3889 }
3890
3891 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3892
3893 static tree
3894 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3895 {
3896   tree val = iv_value (cand->iv, niter);
3897   tree type = TREE_TYPE (cand->iv->base);
3898
3899   if (stmt_after_increment (loop, cand, at))
3900     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3901
3902   return val;
3903 }
3904
3905 /* Returns period of induction variable iv.  */
3906
3907 static tree
3908 iv_period (struct iv *iv)
3909 {
3910   tree step = iv->step, period, type;
3911   tree pow2div;
3912
3913   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3914
3915   /* Period of the iv is gcd (step, type range).  Since type range is power
3916      of two, it suffices to determine the maximum power of two that divides
3917      step.  */
3918   pow2div = num_ending_zeros (step);
3919   type = unsigned_type_for (TREE_TYPE (step));
3920
3921   period = build_low_bits_mask (type,
3922                                 (TYPE_PRECISION (type)
3923                                  - tree_low_cst (pow2div, 1)));
3924
3925   return period;
3926 }
3927
3928 /* Returns the comparison operator used when eliminating the iv USE.  */
3929
3930 static enum tree_code
3931 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3932 {
3933   struct loop *loop = data->current_loop;
3934   basic_block ex_bb;
3935   edge exit;
3936
3937   ex_bb = bb_for_stmt (use->stmt);
3938   exit = EDGE_SUCC (ex_bb, 0);
3939   if (flow_bb_inside_loop_p (loop, exit->dest))
3940     exit = EDGE_SUCC (ex_bb, 1);
3941
3942   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3943 }
3944
3945 /* Check whether it is possible to express the condition in USE by comparison
3946    of candidate CAND.  If so, store the value compared with to BOUND.  */
3947
3948 static bool
3949 may_eliminate_iv (struct ivopts_data *data,
3950                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3951 {
3952   basic_block ex_bb;
3953   edge exit;
3954   tree nit, nit_type;
3955   tree wider_type, period, per_type;
3956   struct loop *loop = data->current_loop;
3957
3958   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3959     return false;
3960
3961   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3962      for other conditions inside loop body.  */
3963   ex_bb = bb_for_stmt (use->stmt);
3964   if (use->stmt != last_stmt (ex_bb)
3965       || TREE_CODE (use->stmt) != COND_EXPR)
3966     return false;
3967   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3968     return false;
3969
3970   exit = EDGE_SUCC (ex_bb, 0);
3971   if (flow_bb_inside_loop_p (loop, exit->dest))
3972     exit = EDGE_SUCC (ex_bb, 1);
3973   if (flow_bb_inside_loop_p (loop, exit->dest))
3974     return false;
3975
3976   nit = niter_for_exit (data, exit);
3977   if (!nit)
3978     return false;
3979
3980   nit_type = TREE_TYPE (nit);
3981
3982   /* Determine whether we may use the variable to test whether niter iterations
3983      elapsed.  This is the case iff the period of the induction variable is
3984      greater than the number of iterations.  */
3985   period = iv_period (cand->iv);
3986   if (!period)
3987     return false;
3988   per_type = TREE_TYPE (period);
3989
3990   wider_type = TREE_TYPE (period);
3991   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3992     wider_type = per_type;
3993   else
3994     wider_type = nit_type;
3995
3996   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3997                                       fold_convert (wider_type, period),
3998                                       fold_convert (wider_type, nit))))
3999     return false;
4000
4001   *bound = cand_value_at (loop, cand, use->stmt, nit);
4002   return true;
4003 }
4004
4005 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4006
4007 static bool
4008 determine_use_iv_cost_condition (struct ivopts_data *data,
4009                                  struct iv_use *use, struct iv_cand *cand)
4010 {
4011   tree bound = NULL_TREE, op, cond;
4012   bitmap depends_on = NULL;
4013   unsigned cost;
4014
4015   /* Only consider real candidates.  */
4016   if (!cand->iv)
4017     {
4018       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4019       return false;
4020     }
4021
4022   if (may_eliminate_iv (data, use, cand, &bound))
4023     {
4024       cost = force_var_cost (data, bound, &depends_on);
4025
4026       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4027       return cost != INFTY;
4028     }
4029
4030   /* The induction variable elimination failed; just express the original
4031      giv.  If it is compared with an invariant, note that we cannot get
4032      rid of it.  */
4033   cost = get_computation_cost (data, use, cand, false, &depends_on);
4034
4035   cond = *use->op_p;
4036   if (TREE_CODE (cond) != SSA_NAME)
4037     {
4038       op = TREE_OPERAND (cond, 0);
4039       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4040         op = TREE_OPERAND (cond, 1);
4041       if (TREE_CODE (op) == SSA_NAME)
4042         {
4043           op = get_iv (data, op)->base;
4044           fd_ivopts_data = data;
4045           walk_tree (&op, find_depends, &depends_on, NULL);
4046         }
4047     }
4048
4049   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4050   return cost != INFTY;
4051 }
4052
4053 /* Determines cost of basing replacement of USE on CAND.  Returns false
4054    if USE cannot be based on CAND.  */
4055
4056 static bool
4057 determine_use_iv_cost (struct ivopts_data *data,
4058                        struct iv_use *use, struct iv_cand *cand)
4059 {
4060   switch (use->type)
4061     {
4062     case USE_NONLINEAR_EXPR:
4063       return determine_use_iv_cost_generic (data, use, cand);
4064
4065     case USE_ADDRESS:
4066       return determine_use_iv_cost_address (data, use, cand);
4067
4068     case USE_COMPARE:
4069       return determine_use_iv_cost_condition (data, use, cand);
4070
4071     default:
4072       gcc_unreachable ();
4073     }
4074 }
4075
4076 /* Determines costs of basing the use of the iv on an iv candidate.  */
4077
4078 static void
4079 determine_use_iv_costs (struct ivopts_data *data)
4080 {
4081   unsigned i, j;
4082   struct iv_use *use;
4083   struct iv_cand *cand;
4084   bitmap to_clear = BITMAP_ALLOC (NULL);
4085
4086   alloc_use_cost_map (data);
4087
4088   for (i = 0; i < n_iv_uses (data); i++)
4089     {
4090       use = iv_use (data, i);
4091
4092       if (data->consider_all_candidates)
4093         {
4094           for (j = 0; j < n_iv_cands (data); j++)
4095             {
4096               cand = iv_cand (data, j);
4097               determine_use_iv_cost (data, use, cand);
4098             }
4099         }
4100       else
4101         {
4102           bitmap_iterator bi;
4103
4104           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4105             {
4106               cand = iv_cand (data, j);
4107               if (!determine_use_iv_cost (data, use, cand))
4108                 bitmap_set_bit (to_clear, j);
4109             }
4110
4111           /* Remove the candidates for that the cost is infinite from
4112              the list of related candidates.  */
4113           bitmap_and_compl_into (use->related_cands, to_clear);
4114           bitmap_clear (to_clear);
4115         }
4116     }
4117
4118   BITMAP_FREE (to_clear);
4119
4120   if (dump_file && (dump_flags & TDF_DETAILS))
4121     {
4122       fprintf (dump_file, "Use-candidate costs:\n");
4123
4124       for (i = 0; i < n_iv_uses (data); i++)
4125         {
4126           use = iv_use (data, i);
4127
4128           fprintf (dump_file, "Use %d:\n", i);
4129           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4130           for (j = 0; j < use->n_map_members; j++)
4131             {
4132               if (!use->cost_map[j].cand
4133                   || use->cost_map[j].cost == INFTY)
4134                 continue;
4135
4136               fprintf (dump_file, "  %d\t%d\t",
4137                        use->cost_map[j].cand->id,
4138                        use->cost_map[j].cost);
4139               if (use->cost_map[j].depends_on)
4140                 bitmap_print (dump_file,
4141                               use->cost_map[j].depends_on, "","");
4142               fprintf (dump_file, "\n");
4143             }
4144
4145           fprintf (dump_file, "\n");
4146         }
4147       fprintf (dump_file, "\n");
4148     }
4149 }
4150
4151 /* Determines cost of the candidate CAND.  */
4152
4153 static void
4154 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4155 {
4156   unsigned cost_base, cost_step;
4157   tree base;
4158
4159   if (!cand->iv)
4160     {
4161       cand->cost = 0;
4162       return;
4163     }
4164
4165   /* There are two costs associated with the candidate -- its increment
4166      and its initialization.  The second is almost negligible for any loop
4167      that rolls enough, so we take it just very little into account.  */
4168
4169   base = cand->iv->base;
4170   cost_base = force_var_cost (data, base, NULL);
4171   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4172
4173   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4174
4175   /* Prefer the original iv unless we may gain something by replacing it;
4176      this is not really relevant for artificial ivs created by other
4177      passes.  */
4178   if (cand->pos == IP_ORIGINAL
4179       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4180     cand->cost--;
4181
4182   /* Prefer not to insert statements into latch unless there are some
4183      already (so that we do not create unnecessary jumps).  */
4184   if (cand->pos == IP_END
4185       && empty_block_p (ip_end_pos (data->current_loop)))
4186     cand->cost++;
4187 }
4188
4189 /* Determines costs of computation of the candidates.  */
4190
4191 static void
4192 determine_iv_costs (struct ivopts_data *data)
4193 {
4194   unsigned i;
4195
4196   if (dump_file && (dump_flags & TDF_DETAILS))
4197     {
4198       fprintf (dump_file, "Candidate costs:\n");
4199       fprintf (dump_file, "  cand\tcost\n");
4200     }
4201
4202   for (i = 0; i < n_iv_cands (data); i++)
4203     {
4204       struct iv_cand *cand = iv_cand (data, i);
4205
4206       determine_iv_cost (data, cand);
4207
4208       if (dump_file && (dump_flags & TDF_DETAILS))
4209         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4210     }
4211
4212 if (dump_file && (dump_flags & TDF_DETAILS))
4213       fprintf (dump_file, "\n");
4214 }
4215
4216 /* Calculates cost for having SIZE induction variables.  */
4217
4218 static unsigned
4219 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4220 {
4221   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4222 }
4223
4224 /* For each size of the induction variable set determine the penalty.  */
4225
4226 static void
4227 determine_set_costs (struct ivopts_data *data)
4228 {
4229   unsigned j, n;
4230   tree phi, op;
4231   struct loop *loop = data->current_loop;
4232   bitmap_iterator bi;
4233
4234   /* We use the following model (definitely improvable, especially the
4235      cost function -- TODO):
4236
4237      We estimate the number of registers available (using MD data), name it A.
4238
4239      We estimate the number of registers used by the loop, name it U.  This
4240      number is obtained as the number of loop phi nodes (not counting virtual
4241      registers and bivs) + the number of variables from outside of the loop.
4242
4243      We set a reserve R (free regs that are used for temporary computations,
4244      etc.).  For now the reserve is a constant 3.
4245
4246      Let I be the number of induction variables.
4247
4248      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4249         make a lot of ivs without a reason).
4250      -- if A - R < U + I <= A, the cost is I * PRES_COST
4251      -- if U + I > A, the cost is I * PRES_COST and
4252         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4253
4254   if (dump_file && (dump_flags & TDF_DETAILS))
4255     {
4256       fprintf (dump_file, "Global costs:\n");
4257       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4258       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4259       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4260       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4261     }
4262
4263   n = 0;
4264   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4265     {
4266       op = PHI_RESULT (phi);
4267
4268       if (!is_gimple_reg (op))
4269         continue;
4270
4271       if (get_iv (data, op))
4272         continue;
4273
4274       n++;
4275     }
4276
4277   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4278     {
4279       struct version_info *info = ver_info (data, j);
4280
4281       if (info->inv_id && info->has_nonlin_use)
4282         n++;
4283     }
4284
4285   data->regs_used = n;
4286   if (dump_file && (dump_flags & TDF_DETAILS))
4287     fprintf (dump_file, "  regs_used %d\n", n);
4288
4289   if (dump_file && (dump_flags & TDF_DETAILS))
4290     {
4291       fprintf (dump_file, "  cost for size:\n");
4292       fprintf (dump_file, "  ivs\tcost\n");
4293       for (j = 0; j <= 2 * target_avail_regs; j++)
4294         fprintf (dump_file, "  %d\t%d\n", j,
4295                  ivopts_global_cost_for_size (data, j));
4296       fprintf (dump_file, "\n");
4297     }
4298 }
4299
4300 /* Returns true if A is a cheaper cost pair than B.  */
4301
4302 static bool
4303 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4304 {
4305   if (!a)
4306     return false;
4307
4308   if (!b)
4309     return true;
4310
4311   if (a->cost < b->cost)
4312     return true;
4313
4314   if (a->cost > b->cost)
4315     return false;
4316
4317   /* In case the costs are the same, prefer the cheaper candidate.  */
4318   if (a->cand->cost < b->cand->cost)
4319     return true;
4320
4321   return false;
4322 }
4323
4324 /* Computes the cost field of IVS structure.  */
4325
4326 static void
4327 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4328 {
4329   unsigned cost = 0;
4330
4331   cost += ivs->cand_use_cost;
4332   cost += ivs->cand_cost;
4333   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4334
4335   ivs->cost = cost;
4336 }
4337
4338 /* Remove invariants in set INVS to set IVS.  */
4339
4340 static void
4341 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4342 {
4343   bitmap_iterator bi;
4344   unsigned iid;
4345
4346   if (!invs)
4347     return;
4348
4349   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4350     {
4351       ivs->n_invariant_uses[iid]--;
4352       if (ivs->n_invariant_uses[iid] == 0)
4353         ivs->n_regs--;
4354     }
4355 }
4356
4357 /* Set USE not to be expressed by any candidate in IVS.  */
4358
4359 static void
4360 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4361                  struct iv_use *use)
4362 {
4363   unsigned uid = use->id, cid;
4364   struct cost_pair *cp;
4365
4366   cp = ivs->cand_for_use[uid];
4367   if (!cp)
4368     return;
4369   cid = cp->cand->id;
4370
4371   ivs->bad_uses++;
4372   ivs->cand_for_use[uid] = NULL;
4373   ivs->n_cand_uses[cid]--;
4374
4375   if (ivs->n_cand_uses[cid] == 0)
4376     {
4377       bitmap_clear_bit (ivs->cands, cid);
4378       /* Do not count the pseudocandidates.  */
4379       if (cp->cand->iv)
4380         ivs->n_regs--;
4381       ivs->n_cands--;
4382       ivs->cand_cost -= cp->cand->cost;
4383
4384       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4385     }
4386
4387   ivs->cand_use_cost -= cp->cost;
4388
4389   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4390   iv_ca_recount_cost (data, ivs);
4391 }
4392
4393 /* Add invariants in set INVS to set IVS.  */
4394
4395 static void
4396 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4397 {
4398   bitmap_iterator bi;
4399   unsigned iid;
4400
4401   if (!invs)
4402     return;
4403
4404   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4405     {
4406       ivs->n_invariant_uses[iid]++;
4407       if (ivs->n_invariant_uses[iid] == 1)
4408         ivs->n_regs++;
4409     }
4410 }
4411
4412 /* Set cost pair for USE in set IVS to CP.  */
4413
4414 static void
4415 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4416               struct iv_use *use, struct cost_pair *cp)
4417 {
4418   unsigned uid = use->id, cid;
4419
4420   if (ivs->cand_for_use[uid] == cp)
4421     return;
4422
4423   if (ivs->cand_for_use[uid])
4424     iv_ca_set_no_cp (data, ivs, use);
4425
4426   if (cp)
4427     {
4428       cid = cp->cand->id;
4429
4430       ivs->bad_uses--;
4431       ivs->cand_for_use[uid] = cp;
4432       ivs->n_cand_uses[cid]++;
4433       if (ivs->n_cand_uses[cid] == 1)
4434         {
4435           bitmap_set_bit (ivs->cands, cid);
4436           /* Do not count the pseudocandidates.  */
4437           if (cp->cand->iv)
4438             ivs->n_regs++;
4439           ivs->n_cands++;
4440           ivs->cand_cost += cp->cand->cost;
4441
4442           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4443         }
4444
4445       ivs->cand_use_cost += cp->cost;
4446       iv_ca_set_add_invariants (ivs, cp->depends_on);
4447       iv_ca_recount_cost (data, ivs);
4448     }
4449 }
4450
4451 /* Extend set IVS by expressing USE by some of the candidates in it
4452    if possible.  */
4453
4454 static void
4455 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4456                struct iv_use *use)
4457 {
4458   struct cost_pair *best_cp = NULL, *cp;
4459   bitmap_iterator bi;
4460   unsigned i;
4461
4462   gcc_assert (ivs->upto >= use->id);
4463
4464   if (ivs->upto == use->id)
4465     {
4466       ivs->upto++;
4467       ivs->bad_uses++;
4468     }
4469
4470   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4471     {
4472       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4473
4474       if (cheaper_cost_pair (cp, best_cp))
4475         best_cp = cp;
4476     }
4477
4478   iv_ca_set_cp (data, ivs, use, best_cp);
4479 }
4480
4481 /* Get cost for assignment IVS.  */
4482
4483 static unsigned
4484 iv_ca_cost (struct iv_ca *ivs)
4485 {
4486   return (ivs->bad_uses ? INFTY : ivs->cost);
4487 }
4488
4489 /* Returns true if all dependences of CP are among invariants in IVS.  */
4490
4491 static bool
4492 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4493 {
4494   unsigned i;
4495   bitmap_iterator bi;
4496
4497   if (!cp->depends_on)
4498     return true;
4499
4500   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4501     {
4502       if (ivs->n_invariant_uses[i] == 0)
4503         return false;
4504     }
4505
4506   return true;
4507 }
4508
4509 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4510    it before NEXT_CHANGE.  */
4511
4512 static struct iv_ca_delta *
4513 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4514                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4515 {
4516   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4517
4518   change->use = use;
4519   change->old_cp = old_cp;
4520   change->new_cp = new_cp;
4521   change->next_change = next_change;
4522
4523   return change;
4524 }
4525
4526 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4527    are rewritten.  */
4528
4529 static struct iv_ca_delta *
4530 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4531 {
4532   struct iv_ca_delta *last;
4533
4534   if (!l2)
4535     return l1;
4536
4537   if (!l1)
4538     return l2;
4539
4540   for (last = l1; last->next_change; last = last->next_change)
4541     continue;
4542   last->next_change = l2;
4543
4544   return l1;
4545 }
4546
4547 /* Returns candidate by that USE is expressed in IVS.  */
4548
4549 static struct cost_pair *
4550 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4551 {
4552   return ivs->cand_for_use[use->id];
4553 }
4554
4555 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4556
4557 static struct iv_ca_delta *
4558 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4559 {
4560   struct iv_ca_delta *act, *next, *prev = NULL;
4561   struct cost_pair *tmp;
4562
4563   for (act = delta; act; act = next)
4564     {
4565       next = act->next_change;
4566       act->next_change = prev;
4567       prev = act;
4568
4569       tmp = act->old_cp;
4570       act->old_cp = act->new_cp;
4571       act->new_cp = tmp;
4572     }
4573
4574   return prev;
4575 }
4576
4577 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4578    reverted instead.  */
4579
4580 static void
4581 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4582                     struct iv_ca_delta *delta, bool forward)
4583 {
4584   struct cost_pair *from, *to;
4585   struct iv_ca_delta *act;
4586
4587   if (!forward)
4588     delta = iv_ca_delta_reverse (delta);
4589
4590   for (act = delta; act; act = act->next_change)
4591     {
4592       from = act->old_cp;
4593       to = act->new_cp;
4594       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4595       iv_ca_set_cp (data, ivs, act->use, to);
4596     }
4597
4598   if (!forward)
4599     iv_ca_delta_reverse (delta);
4600 }
4601
4602 /* Returns true if CAND is used in IVS.  */
4603
4604 static bool
4605 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4606 {
4607   return ivs->n_cand_uses[cand->id] > 0;
4608 }
4609
4610 /* Returns number of induction variable candidates in the set IVS.  */
4611
4612 static unsigned
4613 iv_ca_n_cands (struct iv_ca *ivs)
4614 {
4615   return ivs->n_cands;
4616 }
4617
4618 /* Free the list of changes DELTA.  */
4619
4620 static void
4621 iv_ca_delta_free (struct iv_ca_delta **delta)
4622 {
4623   struct iv_ca_delta *act, *next;
4624
4625   for (act = *delta; act; act = next)
4626     {
4627       next = act->next_change;
4628       free (act);
4629     }
4630
4631   *delta = NULL;
4632 }
4633
4634 /* Allocates new iv candidates assignment.  */
4635
4636 static struct iv_ca *
4637 iv_ca_new (struct ivopts_data *data)
4638 {
4639   struct iv_ca *nw = XNEW (struct iv_ca);
4640
4641   nw->upto = 0;
4642   nw->bad_uses = 0;
4643   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4644   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4645   nw->cands = BITMAP_ALLOC (NULL);
4646   nw->n_cands = 0;
4647   nw->n_regs = 0;
4648   nw->cand_use_cost = 0;
4649   nw->cand_cost = 0;
4650   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4651   nw->cost = 0;
4652
4653   return nw;
4654 }
4655
4656 /* Free memory occupied by the set IVS.  */
4657
4658 static void
4659 iv_ca_free (struct iv_ca **ivs)
4660 {
4661   free ((*ivs)->cand_for_use);
4662   free ((*ivs)->n_cand_uses);
4663   BITMAP_FREE ((*ivs)->cands);
4664   free ((*ivs)->n_invariant_uses);
4665   free (*ivs);
4666   *ivs = NULL;
4667 }
4668
4669 /* Dumps IVS to FILE.  */
4670
4671 static void
4672 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4673 {
4674   const char *pref = "  invariants ";
4675   unsigned i;
4676
4677   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4678   bitmap_print (file, ivs->cands, "  candidates ","\n");
4679
4680   for (i = 1; i <= data->max_inv_id; i++)
4681     if (ivs->n_invariant_uses[i])
4682       {
4683         fprintf (file, "%s%d", pref, i);
4684         pref = ", ";
4685       }
4686   fprintf (file, "\n");
4687 }
4688
4689 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4690    new set, and store differences in DELTA.  Number of induction variables
4691    in the new set is stored to N_IVS.  */
4692
4693 static unsigned
4694 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4695               struct iv_cand *cand, struct iv_ca_delta **delta,
4696               unsigned *n_ivs)
4697 {
4698   unsigned i, cost;
4699   struct iv_use *use;
4700   struct cost_pair *old_cp, *new_cp;
4701
4702   *delta = NULL;
4703   for (i = 0; i < ivs->upto; i++)
4704     {
4705       use = iv_use (data, i);
4706       old_cp = iv_ca_cand_for_use (ivs, use);
4707
4708       if (old_cp
4709           && old_cp->cand == cand)
4710         continue;
4711
4712       new_cp = get_use_iv_cost (data, use, cand);
4713       if (!new_cp)
4714         continue;
4715
4716       if (!iv_ca_has_deps (ivs, new_cp))
4717         continue;
4718
4719       if (!cheaper_cost_pair (new_cp, old_cp))
4720         continue;
4721
4722       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4723     }
4724
4725   iv_ca_delta_commit (data, ivs, *delta, true);
4726   cost = iv_ca_cost (ivs);
4727   if (n_ivs)
4728     *n_ivs = iv_ca_n_cands (ivs);
4729   iv_ca_delta_commit (data, ivs, *delta, false);
4730
4731   return cost;
4732 }
4733
4734 /* Try narrowing set IVS by removing CAND.  Return the cost of
4735    the new set and store the differences in DELTA.  */
4736
4737 static unsigned
4738 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4739               struct iv_cand *cand, struct iv_ca_delta **delta)
4740 {
4741   unsigned i, ci;
4742   struct iv_use *use;
4743   struct cost_pair *old_cp, *new_cp, *cp;
4744   bitmap_iterator bi;
4745   struct iv_cand *cnd;
4746   unsigned cost;
4747
4748   *delta = NULL;
4749   for (i = 0; i < n_iv_uses (data); i++)
4750     {
4751       use = iv_use (data, i);
4752
4753       old_cp = iv_ca_cand_for_use (ivs, use);
4754       if (old_cp->cand != cand)
4755         continue;
4756
4757       new_cp = NULL;
4758
4759       if (data->consider_all_candidates)
4760         {
4761           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4762             {
4763               if (ci == cand->id)
4764                 continue;
4765
4766               cnd = iv_cand (data, ci);
4767
4768               cp = get_use_iv_cost (data, use, cnd);
4769               if (!cp)
4770                 continue;
4771               if (!iv_ca_has_deps (ivs, cp))
4772                 continue;
4773
4774               if (!cheaper_cost_pair (cp, new_cp))
4775                 continue;
4776
4777               new_cp = cp;
4778             }
4779         }
4780       else
4781         {
4782           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4783             {
4784               if (ci == cand->id)
4785                 continue;
4786
4787               cnd = iv_cand (data, ci);
4788
4789               cp = get_use_iv_cost (data, use, cnd);
4790               if (!cp)
4791                 continue;
4792               if (!iv_ca_has_deps (ivs, cp))
4793                 continue;
4794
4795               if (!cheaper_cost_pair (cp, new_cp))
4796                 continue;
4797
4798               new_cp = cp;
4799             }
4800         }
4801
4802       if (!new_cp)
4803         {
4804           iv_ca_delta_free (delta);
4805           return INFTY;
4806         }
4807
4808       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4809     }
4810
4811   iv_ca_delta_commit (data, ivs, *delta, true);
4812   cost = iv_ca_cost (ivs);
4813   iv_ca_delta_commit (data, ivs, *delta, false);
4814
4815   return cost;
4816 }
4817
4818 /* Try optimizing the set of candidates IVS by removing candidates different
4819    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4820    differences in DELTA.  */
4821
4822 static unsigned
4823 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4824              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4825 {
4826   bitmap_iterator bi;
4827   struct iv_ca_delta *act_delta, *best_delta;
4828   unsigned i, best_cost, acost;
4829   struct iv_cand *cand;
4830
4831   best_delta = NULL;
4832   best_cost = iv_ca_cost (ivs);
4833
4834   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4835     {
4836       cand = iv_cand (data, i);
4837
4838       if (cand == except_cand)
4839         continue;
4840
4841       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4842
4843       if (acost < best_cost)
4844         {
4845           best_cost = acost;
4846           iv_ca_delta_free (&best_delta);
4847           best_delta = act_delta;
4848         }
4849       else
4850         iv_ca_delta_free (&act_delta);
4851     }
4852
4853   if (!best_delta)
4854     {
4855       *delta = NULL;
4856       return best_cost;
4857     }
4858
4859   /* Recurse to possibly remove other unnecessary ivs.  */
4860   iv_ca_delta_commit (data, ivs, best_delta, true);
4861   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4862   iv_ca_delta_commit (data, ivs, best_delta, false);
4863   *delta = iv_ca_delta_join (best_delta, *delta);
4864   return best_cost;
4865 }
4866
4867 /* Tries to extend the sets IVS in the best possible way in order
4868    to express the USE.  */
4869
4870 static bool
4871 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4872                   struct iv_use *use)
4873 {
4874   unsigned best_cost, act_cost;
4875   unsigned i;
4876   bitmap_iterator bi;
4877   struct iv_cand *cand;
4878   struct iv_ca_delta *best_delta = NULL, *act_delta;
4879   struct cost_pair *cp;
4880
4881   iv_ca_add_use (data, ivs, use);
4882   best_cost = iv_ca_cost (ivs);
4883
4884   cp = iv_ca_cand_for_use (ivs, use);
4885   if (cp)
4886     {
4887       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4888       iv_ca_set_no_cp (data, ivs, use);
4889     }
4890
4891   /* First try important candidates.  Only if it fails, try the specific ones.
4892      Rationale -- in loops with many variables the best choice often is to use
4893      just one generic biv.  If we added here many ivs specific to the uses,
4894      the optimization algorithm later would be likely to get stuck in a local
4895      minimum, thus causing us to create too many ivs.  The approach from
4896      few ivs to more seems more likely to be successful -- starting from few
4897      ivs, replacing an expensive use by a specific iv should always be a
4898      win.  */
4899   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4900     {
4901       cand = iv_cand (data, i);
4902
4903       if (iv_ca_cand_used_p (ivs, cand))
4904         continue;
4905
4906       cp = get_use_iv_cost (data, use, cand);
4907       if (!cp)
4908         continue;
4909
4910       iv_ca_set_cp (data, ivs, use, cp);
4911       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4912       iv_ca_set_no_cp (data, ivs, use);
4913       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4914
4915       if (act_cost < best_cost)
4916         {
4917           best_cost = act_cost;
4918
4919           iv_ca_delta_free (&best_delta);
4920           best_delta = act_delta;
4921         }
4922       else
4923         iv_ca_delta_free (&act_delta);
4924     }
4925
4926   if (best_cost == INFTY)
4927     {
4928       for (i = 0; i < use->n_map_members; i++)
4929         {
4930           cp = use->cost_map + i;
4931           cand = cp->cand;
4932           if (!cand)
4933             continue;
4934
4935           /* Already tried this.  */
4936           if (cand->important)
4937             continue;
4938
4939           if (iv_ca_cand_used_p (ivs, cand))
4940             continue;
4941
4942           act_delta = NULL;
4943           iv_ca_set_cp (data, ivs, use, cp);
4944           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4945           iv_ca_set_no_cp (data, ivs, use);
4946           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4947                                        cp, act_delta);
4948
4949           if (act_cost < best_cost)
4950             {
4951               best_cost = act_cost;
4952
4953               if (best_delta)
4954                 iv_ca_delta_free (&best_delta);
4955               best_delta = act_delta;
4956             }
4957           else
4958             iv_ca_delta_free (&act_delta);
4959         }
4960     }
4961
4962   iv_ca_delta_commit (data, ivs, best_delta, true);
4963   iv_ca_delta_free (&best_delta);
4964
4965   return (best_cost != INFTY);
4966 }
4967
4968 /* Finds an initial assignment of candidates to uses.  */
4969
4970 static struct iv_ca *
4971 get_initial_solution (struct ivopts_data *data)
4972 {
4973   struct iv_ca *ivs = iv_ca_new (data);
4974   unsigned i;
4975
4976   for (i = 0; i < n_iv_uses (data); i++)
4977     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4978       {
4979         iv_ca_free (&ivs);
4980         return NULL;
4981       }
4982
4983   return ivs;
4984 }
4985
4986 /* Tries to improve set of induction variables IVS.  */
4987
4988 static bool
4989 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4990 {
4991   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4992   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4993   struct iv_cand *cand;
4994
4995   /* Try extending the set of induction variables by one.  */
4996   for (i = 0; i < n_iv_cands (data); i++)
4997     {
4998       cand = iv_cand (data, i);
4999
5000       if (iv_ca_cand_used_p (ivs, cand))
5001         continue;
5002
5003       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5004       if (!act_delta)
5005         continue;
5006
5007       /* If we successfully added the candidate and the set is small enough,
5008          try optimizing it by removing other candidates.  */
5009       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5010         {
5011           iv_ca_delta_commit (data, ivs, act_delta, true);
5012           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5013           iv_ca_delta_commit (data, ivs, act_delta, false);
5014           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5015         }
5016
5017       if (acost < best_cost)
5018         {
5019           best_cost = acost;
5020           iv_ca_delta_free (&best_delta);
5021           best_delta = act_delta;
5022         }
5023       else
5024         iv_ca_delta_free (&act_delta);
5025     }
5026
5027   if (!best_delta)
5028     {
5029       /* Try removing the candidates from the set instead.  */
5030       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5031
5032       /* Nothing more we can do.  */
5033       if (!best_delta)
5034         return false;
5035     }
5036
5037   iv_ca_delta_commit (data, ivs, best_delta, true);
5038   gcc_assert (best_cost == iv_ca_cost (ivs));
5039   iv_ca_delta_free (&best_delta);
5040   return true;
5041 }
5042
5043 /* Attempts to find the optimal set of induction variables.  We do simple
5044    greedy heuristic -- we try to replace at most one candidate in the selected
5045    solution and remove the unused ivs while this improves the cost.  */
5046
5047 static struct iv_ca *
5048 find_optimal_iv_set (struct ivopts_data *data)
5049 {
5050   unsigned i;
5051   struct iv_ca *set;
5052   struct iv_use *use;
5053
5054   /* Get the initial solution.  */
5055   set = get_initial_solution (data);
5056   if (!set)
5057     {
5058       if (dump_file && (dump_flags & TDF_DETAILS))
5059         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5060       return NULL;
5061     }
5062
5063   if (dump_file && (dump_flags & TDF_DETAILS))
5064     {
5065       fprintf (dump_file, "Initial set of candidates:\n");
5066       iv_ca_dump (data, dump_file, set);
5067     }
5068
5069   while (try_improve_iv_set (data, set))
5070     {
5071       if (dump_file && (dump_flags & TDF_DETAILS))
5072         {
5073           fprintf (dump_file, "Improved to:\n");
5074           iv_ca_dump (data, dump_file, set);
5075         }
5076     }
5077
5078   if (dump_file && (dump_flags & TDF_DETAILS))
5079     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5080
5081   for (i = 0; i < n_iv_uses (data); i++)
5082     {
5083       use = iv_use (data, i);
5084       use->selected = iv_ca_cand_for_use (set, use)->cand;
5085     }
5086
5087   return set;
5088 }
5089
5090 /* Creates a new induction variable corresponding to CAND.  */
5091
5092 static void
5093 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5094 {
5095   block_stmt_iterator incr_pos;
5096   tree base;
5097   bool after = false;
5098
5099   if (!cand->iv)
5100     return;
5101
5102   switch (cand->pos)
5103     {
5104     case IP_NORMAL:
5105       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5106       break;
5107
5108     case IP_END:
5109       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5110       after = true;
5111       break;
5112
5113     case IP_ORIGINAL:
5114       /* Mark that the iv is preserved.  */
5115       name_info (data, cand->var_before)->preserve_biv = true;
5116       name_info (data, cand->var_after)->preserve_biv = true;
5117
5118       /* Rewrite the increment so that it uses var_before directly.  */
5119       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5120
5121       return;
5122     }
5123
5124   gimple_add_tmp_var (cand->var_before);
5125   add_referenced_var (cand->var_before);
5126
5127   base = unshare_expr (cand->iv->base);
5128
5129   create_iv (base, unshare_expr (cand->iv->step),
5130              cand->var_before, data->current_loop,
5131              &incr_pos, after, &cand->var_before, &cand->var_after);
5132 }
5133
5134 /* Creates new induction variables described in SET.  */
5135
5136 static void
5137 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5138 {
5139   unsigned i;
5140   struct iv_cand *cand;
5141   bitmap_iterator bi;
5142
5143   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5144     {
5145       cand = iv_cand (data, i);
5146       create_new_iv (data, cand);
5147     }
5148 }
5149
5150 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5151    is true, remove also the ssa name defined by the statement.  */
5152
5153 static void
5154 remove_statement (tree stmt, bool including_defined_name)
5155 {
5156   if (TREE_CODE (stmt) == PHI_NODE)
5157     {
5158       if (!including_defined_name)
5159         {
5160           /* Prevent the ssa name defined by the statement from being removed.  */
5161           SET_PHI_RESULT (stmt, NULL);
5162         }
5163       remove_phi_node (stmt, NULL_TREE);
5164     }
5165   else
5166     {
5167       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5168
5169       bsi_remove (&bsi, true);
5170     }
5171 }
5172
5173 /* Rewrites USE (definition of iv used in a nonlinear expression)
5174    using candidate CAND.  */
5175
5176 static void
5177 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5178                             struct iv_use *use, struct iv_cand *cand)
5179 {
5180   tree comp;
5181   tree op, stmts, tgt, ass;
5182   block_stmt_iterator bsi, pbsi;
5183
5184   /* An important special case -- if we are asked to express value of
5185      the original iv by itself, just exit; there is no need to
5186      introduce a new computation (that might also need casting the
5187      variable to unsigned and back).  */
5188   if (cand->pos == IP_ORIGINAL
5189       && cand->incremented_at == use->stmt)
5190     {
5191       tree step, ctype, utype;
5192       enum tree_code incr_code = PLUS_EXPR;
5193
5194       gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5195       gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5196
5197       step = cand->iv->step;
5198       ctype = TREE_TYPE (step);
5199       utype = TREE_TYPE (cand->var_after);
5200       if (TREE_CODE (step) == NEGATE_EXPR)
5201         {
5202           incr_code = MINUS_EXPR;
5203           step = TREE_OPERAND (step, 0);
5204         }
5205
5206       /* Check whether we may leave the computation unchanged.
5207          This is the case only if it does not rely on other
5208          computations in the loop -- otherwise, the computation
5209          we rely upon may be removed in remove_unused_ivs,
5210          thus leading to ICE.  */
5211       op = TREE_OPERAND (use->stmt, 1);
5212       if (TREE_CODE (op) == PLUS_EXPR
5213           || TREE_CODE (op) == MINUS_EXPR)
5214         {
5215           if (TREE_OPERAND (op, 0) == cand->var_before)
5216             op = TREE_OPERAND (op, 1);
5217           else if (TREE_CODE (op) == PLUS_EXPR
5218                    && TREE_OPERAND (op, 1) == cand->var_before)
5219             op = TREE_OPERAND (op, 0);
5220           else
5221             op = NULL_TREE;
5222         }
5223       else
5224         op = NULL_TREE;
5225
5226       if (op
5227           && (TREE_CODE (op) == INTEGER_CST
5228               || operand_equal_p (op, step, 0)))
5229         return;
5230
5231       /* Otherwise, add the necessary computations to express
5232          the iv.  */
5233       op = fold_convert (ctype, cand->var_before);
5234       comp = fold_convert (utype,
5235                            build2 (incr_code, ctype, op,
5236                                    unshare_expr (step)));
5237     }
5238   else
5239     comp = get_computation (data->current_loop, use, cand);
5240
5241   switch (TREE_CODE (use->stmt))
5242     {
5243     case PHI_NODE:
5244       tgt = PHI_RESULT (use->stmt);
5245
5246       /* If we should keep the biv, do not replace it.  */
5247       if (name_info (data, tgt)->preserve_biv)
5248         return;
5249
5250       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5251       while (!bsi_end_p (pbsi)
5252              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5253         {
5254           bsi = pbsi;
5255           bsi_next (&pbsi);
5256         }
5257       break;
5258
5259     case MODIFY_EXPR:
5260       tgt = TREE_OPERAND (use->stmt, 0);
5261       bsi = bsi_for_stmt (use->stmt);
5262       break;
5263
5264     default:
5265       gcc_unreachable ();
5266     }
5267
5268   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5269
5270   if (TREE_CODE (use->stmt) == PHI_NODE)
5271     {
5272       if (stmts)
5273         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5274       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5275       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5276       remove_statement (use->stmt, false);
5277       SSA_NAME_DEF_STMT (tgt) = ass;
5278     }
5279   else
5280     {
5281       if (stmts)
5282         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5283       TREE_OPERAND (use->stmt, 1) = op;
5284     }
5285 }
5286
5287 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5288    for_each_index.  */
5289
5290 static bool
5291 idx_remove_ssa_names (tree base, tree *idx,
5292                       void *data ATTRIBUTE_UNUSED)
5293 {
5294   tree *op;
5295
5296   if (TREE_CODE (*idx) == SSA_NAME)
5297     *idx = SSA_NAME_VAR (*idx);
5298
5299   if (TREE_CODE (base) == ARRAY_REF)
5300     {
5301       op = &TREE_OPERAND (base, 2);
5302       if (*op
5303           && TREE_CODE (*op) == SSA_NAME)
5304         *op = SSA_NAME_VAR (*op);
5305       op = &TREE_OPERAND (base, 3);
5306       if (*op
5307           && TREE_CODE (*op) == SSA_NAME)
5308         *op = SSA_NAME_VAR (*op);
5309     }
5310
5311   return true;
5312 }
5313
5314 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5315
5316 static tree
5317 unshare_and_remove_ssa_names (tree ref)
5318 {
5319   ref = unshare_expr (ref);
5320   for_each_index (&ref, idx_remove_ssa_names, NULL);
5321
5322   return ref;
5323 }
5324
5325 /* Extract the alias analysis info for the memory reference REF.  There are
5326    several ways how this information may be stored and what precisely is
5327    its semantics depending on the type of the reference, but there always is
5328    somewhere hidden one _DECL node that is used to determine the set of
5329    virtual operands for the reference.  The code below deciphers this jungle
5330    and extracts this single useful piece of information.  */
5331
5332 static tree
5333 get_ref_tag (tree ref, tree orig)
5334 {
5335   tree var = get_base_address (ref);
5336   tree aref = NULL_TREE, tag, sv;
5337   HOST_WIDE_INT offset, size, maxsize;
5338
5339   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5340     {
5341       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5342       if (ref)
5343         break;
5344     }
5345
5346   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5347     return unshare_expr (sv);
5348
5349   if (!var)
5350     return NULL_TREE;
5351
5352   if (TREE_CODE (var) == INDIRECT_REF)
5353     {
5354       /* If the base is a dereference of a pointer, first check its name memory
5355          tag.  If it does not have one, use its symbol memory tag.  */
5356       var = TREE_OPERAND (var, 0);
5357       if (TREE_CODE (var) != SSA_NAME)
5358         return NULL_TREE;
5359
5360       if (SSA_NAME_PTR_INFO (var))
5361         {
5362           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5363           if (tag)
5364             return tag;
5365         }
5366
5367       var = SSA_NAME_VAR (var);
5368       tag = var_ann (var)->symbol_mem_tag;
5369       gcc_assert (tag != NULL_TREE);
5370       return tag;
5371     }
5372   else
5373     {
5374       if (!DECL_P (var))
5375         return NULL_TREE;
5376
5377       tag = var_ann (var)->symbol_mem_tag;
5378       if (tag)
5379         return tag;
5380
5381       return var;
5382     }
5383 }
5384
5385 /* Copies the reference information from OLD_REF to NEW_REF.  */
5386
5387 static void
5388 copy_ref_info (tree new_ref, tree old_ref)
5389 {
5390   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5391     copy_mem_ref_info (new_ref, old_ref);
5392   else
5393     {
5394       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5395       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5396     }
5397 }
5398
5399 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5400
5401 static void
5402 rewrite_use_address (struct ivopts_data *data,
5403                      struct iv_use *use, struct iv_cand *cand)
5404 {
5405   struct affine_tree_combination aff;
5406   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5407   tree ref;
5408
5409   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5410   unshare_aff_combination (&aff);
5411
5412   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5413   copy_ref_info (ref, *use->op_p);
5414   *use->op_p = ref;
5415 }
5416
5417 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5418    candidate CAND.  */
5419
5420 static void
5421 rewrite_use_compare (struct ivopts_data *data,
5422                      struct iv_use *use, struct iv_cand *cand)
5423 {
5424   tree comp;
5425   tree *op_p, cond, op, stmts, bound;
5426   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5427   enum tree_code compare;
5428   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5429
5430   bound = cp->value;
5431   if (bound)
5432     {
5433       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5434       tree var_type = TREE_TYPE (var);
5435
5436       compare = iv_elimination_compare (data, use);
5437       bound = fold_convert (var_type, bound);
5438       op = force_gimple_operand (unshare_expr (bound), &stmts,
5439                                  true, NULL_TREE);
5440
5441       if (stmts)
5442         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5443
5444       *use->op_p = build2 (compare, boolean_type_node, var, op);
5445       update_stmt (use->stmt);
5446       return;
5447     }
5448
5449   /* The induction variable elimination failed; just express the original
5450      giv.  */
5451   comp = get_computation (data->current_loop, use, cand);
5452
5453   cond = *use->op_p;
5454   op_p = &TREE_OPERAND (cond, 0);
5455   if (TREE_CODE (*op_p) != SSA_NAME
5456       || zero_p (get_iv (data, *op_p)->step))
5457     op_p = &TREE_OPERAND (cond, 1);
5458
5459   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5460   if (stmts)
5461     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5462
5463   *op_p = op;
5464 }
5465
5466 /* Ensure that operand *OP_P may be used at the end of EXIT without
5467    violating loop closed ssa form.  */
5468
5469 static void
5470 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5471 {
5472   basic_block def_bb;
5473   struct loop *def_loop;
5474   tree phi, use;
5475
5476   use = USE_FROM_PTR (op_p);
5477   if (TREE_CODE (use) != SSA_NAME)
5478     return;
5479
5480   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5481   if (!def_bb)
5482     return;
5483
5484   def_loop = def_bb->loop_father;
5485   if (flow_bb_inside_loop_p (def_loop, exit->dest))
5486     return;
5487
5488   /* Try finding a phi node that copies the value out of the loop.  */
5489   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5490     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5491       break;
5492
5493   if (!phi)
5494     {
5495       /* Create such a phi node.  */
5496       tree new_name = duplicate_ssa_name (use, NULL);
5497
5498       phi = create_phi_node (new_name, exit->dest);
5499       SSA_NAME_DEF_STMT (new_name) = phi;
5500       add_phi_arg (phi, use, exit);
5501     }
5502
5503   SET_USE (op_p, PHI_RESULT (phi));
5504 }
5505
5506 /* Ensure that operands of STMT may be used at the end of EXIT without
5507    violating loop closed ssa form.  */
5508
5509 static void
5510 protect_loop_closed_ssa_form (edge exit, tree stmt)
5511 {
5512   ssa_op_iter iter;
5513   use_operand_p use_p;
5514
5515   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5516     protect_loop_closed_ssa_form_use (exit, use_p);
5517 }
5518
5519 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5520    so that they are emitted on the correct place, and so that the loop closed
5521    ssa form is preserved.  */
5522
5523 void
5524 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5525 {
5526   tree_stmt_iterator tsi;
5527   block_stmt_iterator bsi;
5528   tree phi, stmt, def, next;
5529
5530   if (!single_pred_p (exit->dest))
5531     split_loop_exit_edge (exit);
5532
5533   /* Ensure there is label in exit->dest, so that we can
5534      insert after it.  */
5535   tree_block_label (exit->dest);
5536   bsi = bsi_after_labels (exit->dest);
5537
5538   if (TREE_CODE (stmts) == STATEMENT_LIST)
5539     {
5540       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5541         {
5542           tree stmt = tsi_stmt (tsi);
5543           bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5544           protect_loop_closed_ssa_form (exit, stmt);
5545         }
5546     }
5547   else
5548     {
5549       bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5550       protect_loop_closed_ssa_form (exit, stmts);
5551     }
5552
5553   if (!op)
5554     return;
5555
5556   for (phi = phi_nodes (exit->dest); phi; phi = next)
5557     {
5558       next = PHI_CHAIN (phi);
5559
5560       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5561         {
5562           def = PHI_RESULT (phi);
5563           remove_statement (phi, false);
5564           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5565                         def, op);
5566           SSA_NAME_DEF_STMT (def) = stmt;
5567           bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5568         }
5569     }
5570 }
5571
5572 /* Rewrites USE using candidate CAND.  */
5573
5574 static void
5575 rewrite_use (struct ivopts_data *data,
5576              struct iv_use *use, struct iv_cand *cand)
5577 {
5578   switch (use->type)
5579     {
5580       case USE_NONLINEAR_EXPR:
5581         rewrite_use_nonlinear_expr (data, use, cand);
5582         break;
5583
5584       case USE_ADDRESS:
5585         rewrite_use_address (data, use, cand);
5586         break;
5587
5588       case USE_COMPARE:
5589         rewrite_use_compare (data, use, cand);
5590         break;
5591
5592       default:
5593         gcc_unreachable ();
5594     }
5595   mark_new_vars_to_rename (use->stmt);
5596 }
5597
5598 /* Rewrite the uses using the selected induction variables.  */
5599
5600 static void
5601 rewrite_uses (struct ivopts_data *data)
5602 {
5603   unsigned i;
5604   struct iv_cand *cand;
5605   struct iv_use *use;
5606
5607   for (i = 0; i < n_iv_uses (data); i++)
5608     {
5609       use = iv_use (data, i);
5610       cand = use->selected;
5611       gcc_assert (cand);
5612
5613       rewrite_use (data, use, cand);
5614     }
5615 }
5616
5617 /* Removes the ivs that are not used after rewriting.  */
5618
5619 static void
5620 remove_unused_ivs (struct ivopts_data *data)
5621 {
5622   unsigned j;
5623   bitmap_iterator bi;
5624
5625   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5626     {
5627       struct version_info *info;
5628
5629       info = ver_info (data, j);
5630       if (info->iv
5631           && !zero_p (info->iv->step)
5632           && !info->inv_id
5633           && !info->iv->have_use_for
5634           && !info->preserve_biv)
5635         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5636     }
5637 }
5638
5639 /* Frees data allocated by the optimization of a single loop.  */
5640
5641 static void
5642 free_loop_data (struct ivopts_data *data)
5643 {
5644   unsigned i, j;
5645   bitmap_iterator bi;
5646   tree obj;
5647
5648   htab_empty (data->niters);
5649
5650   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5651     {
5652       struct version_info *info;
5653
5654       info = ver_info (data, i);
5655       if (info->iv)
5656         free (info->iv);
5657       info->iv = NULL;
5658       info->has_nonlin_use = false;
5659       info->preserve_biv = false;
5660       info->inv_id = 0;
5661     }
5662   bitmap_clear (data->relevant);
5663   bitmap_clear (data->important_candidates);
5664
5665   for (i = 0; i < n_iv_uses (data); i++)
5666     {
5667       struct iv_use *use = iv_use (data, i);
5668
5669       free (use->iv);
5670       BITMAP_FREE (use->related_cands);
5671       for (j = 0; j < use->n_map_members; j++)
5672         if (use->cost_map[j].depends_on)
5673           BITMAP_FREE (use->cost_map[j].depends_on);
5674       free (use->cost_map);
5675       free (use);
5676     }
5677   VEC_truncate (iv_use_p, data->iv_uses, 0);
5678
5679   for (i = 0; i < n_iv_cands (data); i++)
5680     {
5681       struct iv_cand *cand = iv_cand (data, i);
5682
5683       if (cand->iv)
5684         free (cand->iv);
5685       if (cand->depends_on)
5686         BITMAP_FREE (cand->depends_on);
5687       free (cand);
5688     }
5689   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5690
5691   if (data->version_info_size < num_ssa_names)
5692     {
5693       data->version_info_size = 2 * num_ssa_names;
5694       free (data->version_info);
5695       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5696     }
5697
5698   data->max_inv_id = 0;
5699
5700   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5701     SET_DECL_RTL (obj, NULL_RTX);
5702
5703   VEC_truncate (tree, decl_rtl_to_reset, 0);
5704 }
5705
5706 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5707    loop tree.  */
5708
5709 static void
5710 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5711 {
5712   free_loop_data (data);
5713   free (data->version_info);
5714   BITMAP_FREE (data->relevant);
5715   BITMAP_FREE (data->important_candidates);
5716   htab_delete (data->niters);
5717
5718   VEC_free (tree, heap, decl_rtl_to_reset);
5719   VEC_free (iv_use_p, heap, data->iv_uses);
5720   VEC_free (iv_cand_p, heap, data->iv_candidates);
5721 }
5722
5723 /* Optimizes the LOOP.  Returns true if anything changed.  */
5724
5725 static bool
5726 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5727 {
5728   bool changed = false;
5729   struct iv_ca *iv_ca;
5730   edge exit;
5731
5732   data->current_loop = loop;
5733
5734   if (dump_file && (dump_flags & TDF_DETAILS))
5735     {
5736       fprintf (dump_file, "Processing loop %d\n", loop->num);
5737
5738       exit = single_dom_exit (loop);
5739       if (exit)
5740         {
5741           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5742                    exit->src->index, exit->dest->index);
5743           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5744           fprintf (dump_file, "\n");
5745         }
5746
5747       fprintf (dump_file, "\n");
5748     }
5749
5750   /* For each ssa name determines whether it behaves as an induction variable
5751      in some loop.  */
5752   if (!find_induction_variables (data))
5753     goto finish;
5754
5755   /* Finds interesting uses (item 1).  */
5756   find_interesting_uses (data);
5757   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5758     goto finish;
5759
5760   /* Finds candidates for the induction variables (item 2).  */
5761   find_iv_candidates (data);
5762
5763   /* Calculates the costs (item 3, part 1).  */
5764   determine_use_iv_costs (data);
5765   determine_iv_costs (data);
5766   determine_set_costs (data);
5767
5768   /* Find the optimal set of induction variables (item 3, part 2).  */
5769   iv_ca = find_optimal_iv_set (data);
5770   if (!iv_ca)
5771     goto finish;
5772   changed = true;
5773
5774   /* Create the new induction variables (item 4, part 1).  */
5775   create_new_ivs (data, iv_ca);
5776   iv_ca_free (&iv_ca);
5777
5778   /* Rewrite the uses (item 4, part 2).  */
5779   rewrite_uses (data);
5780
5781   /* Remove the ivs that are unused after rewriting.  */
5782   remove_unused_ivs (data);
5783
5784   /* We have changed the structure of induction variables; it might happen
5785      that definitions in the scev database refer to some of them that were
5786      eliminated.  */
5787   scev_reset ();
5788
5789 finish:
5790   free_loop_data (data);
5791
5792   return changed;
5793 }
5794
5795 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5796
5797 void
5798 tree_ssa_iv_optimize (struct loops *loops)
5799 {
5800   struct loop *loop;
5801   struct ivopts_data data;
5802
5803   tree_ssa_iv_optimize_init (&data);
5804
5805   /* Optimize the loops starting with the innermost ones.  */
5806   loop = loops->tree_root;
5807   while (loop->inner)
5808     loop = loop->inner;
5809
5810   /* Scan the loops, inner ones first.  */
5811   while (loop != loops->tree_root)
5812     {
5813       if (dump_file && (dump_flags & TDF_DETAILS))
5814         flow_loop_dump (loop, dump_file, NULL, 1);
5815
5816       tree_ssa_iv_optimize_loop (&data, loop);
5817
5818       if (loop->next)
5819         {
5820           loop = loop->next;
5821           while (loop->inner)
5822             loop = loop->inner;
5823         }
5824       else
5825         loop = loop->outer;
5826     }
5827
5828   tree_ssa_iv_optimize_finalize (&data);
5829 }