gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Information attached to loop.  */
 125 struct loop_data
 126 {
 127   unsigned regs_used;   /* Number of registers used.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_OUTER,            /* The induction variable is used outside the loop.  */
 135   USE_ADDRESS,          /* Use in an address.  */
 136   USE_COMPARE           /* Use is a compare.  */
 137 };
 138
 139 /* The candidate - cost pair.  */
 140 struct cost_pair
 141 {
 142   struct iv_cand *cand; /* The candidate.  */
 143   unsigned cost;        /* The cost.  */
 144   bitmap depends_on;    /* The list of invariants that have to be
 145                            preserved.  */
 146   tree value;           /* For final value elimination, the expression for
 147                            the final value of the iv.  For iv elimination,
 148                            the new bound to compare with.  */
 149 };
 150
 151 /* Use.  */
 152 struct iv_use
 153 {
 154   unsigned id;          /* The id of the use.  */
 155   enum use_type type;   /* Type of the use.  */
 156   struct iv *iv;        /* The induction variable it is based on.  */
 157   tree stmt;            /* Statement in that it occurs.  */
 158   tree *op_p;           /* The place where it occurs.  */
 159   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 160                            important ones.  */
 161
 162   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 163   struct cost_pair *cost_map;
 164                         /* The costs wrto the iv candidates.  */
 165
 166   struct iv_cand *selected;
 167                         /* The selected candidate.  */
 168 };
 169
 170 /* The position where the iv is computed.  */
 171 enum iv_position
 172 {
 173   IP_NORMAL,            /* At the end, just before the exit condition.  */
 174   IP_END,               /* At the end of the latch block.  */
 175   IP_ORIGINAL           /* The original biv.  */
 176 };
 177
 178 /* The induction variable candidate.  */
 179 struct iv_cand
 180 {
 181   unsigned id;          /* The number of the candidate.  */
 182   bool important;       /* Whether this is an "important" candidate, i.e. such
 183                            that it should be considered by all uses.  */
 184   enum iv_position pos; /* Where it is computed.  */
 185   tree incremented_at;  /* For original biv, the statement where it is
 186                            incremented.  */
 187   tree var_before;      /* The variable used for it before increment.  */
 188   tree var_after;       /* The variable used for it after increment.  */
 189   struct iv *iv;        /* The value of the candidate.  NULL for
 190                            "pseudocandidate" used to indicate the possibility
 191                            to replace the final value of an iv by direct
 192                            computation of the value.  */
 193   unsigned cost;        /* Cost of the candidate.  */
 194   bitmap depends_on;    /* The list of invariants that are used in step of the
 195                            biv.  */
 196 };
 197
 198 /* The data used by the induction variable optimizations.  */
 199
 200 typedef struct iv_use *iv_use_p;
 201 DEF_VEC_P(iv_use_p);
 202 DEF_VEC_ALLOC_P(iv_use_p,heap);
 203
 204 typedef struct iv_cand *iv_cand_p;
 205 DEF_VEC_P(iv_cand_p);
 206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 207
 208 struct ivopts_data
 209 {
 210   /* The currently optimized loop.  */
 211   struct loop *current_loop;
 212
 213   /* Numbers of iterations for all exits of the current loop.  */
 214   htab_t niters;
 215
 216   /* The size of version_info array allocated.  */
 217   unsigned version_info_size;
 218
 219   /* The array of information for the ssa names.  */
 220   struct version_info *version_info;
 221
 222   /* The bitmap of indices in version_info whose value was changed.  */
 223   bitmap relevant;
 224
 225   /* The maximum invariant id.  */
 226   unsigned max_inv_id;
 227
 228   /* The uses of induction variables.  */
 229   VEC(iv_use_p,heap) *iv_uses;
 230
 231   /* The candidates.  */
 232   VEC(iv_cand_p,heap) *iv_candidates;
 233
 234   /* A bitmap of important candidates.  */
 235   bitmap important_candidates;
 236
 237   /* Whether to consider just related and important candidates when replacing a
 238      use.  */
 239   bool consider_all_candidates;
 240 };
 241
 242 /* An assignment of iv candidates to uses.  */
 243
 244 struct iv_ca
 245 {
 246   /* The number of uses covered by the assignment.  */
 247   unsigned upto;
 248
 249   /* Number of uses that cannot be expressed by the candidates in the set.  */
 250   unsigned bad_uses;
 251
 252   /* Candidate assigned to a use, together with the related costs.  */
 253   struct cost_pair **cand_for_use;
 254
 255   /* Number of times each candidate is used.  */
 256   unsigned *n_cand_uses;
 257
 258   /* The candidates used.  */
 259   bitmap cands;
 260
 261   /* The number of candidates in the set.  */
 262   unsigned n_cands;
 263
 264   /* Total number of registers needed.  */
 265   unsigned n_regs;
 266
 267   /* Total cost of expressing uses.  */
 268   unsigned cand_use_cost;
 269
 270   /* Total cost of candidates.  */
 271   unsigned cand_cost;
 272
 273   /* Number of times each invariant is used.  */
 274   unsigned *n_invariant_uses;
 275
 276   /* Total cost of the assignment.  */
 277   unsigned cost;
 278 };
 279
 280 /* Difference of two iv candidate assignments.  */
 281
 282 struct iv_ca_delta
 283 {
 284   /* Changed use.  */
 285   struct iv_use *use;
 286
 287   /* An old assignment (for rollback purposes).  */
 288   struct cost_pair *old_cp;
 289
 290   /* A new assignment.  */
 291   struct cost_pair *new_cp;
 292
 293   /* Next change in the list.  */
 294   struct iv_ca_delta *next_change;
 295 };
 296
 297 /* Bound on number of candidates below that all candidates are considered.  */
 298
 299 #define CONSIDER_ALL_CANDIDATES_BOUND \
 300   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 301
 302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 303    optimizing such a loop would help, and it would take ages).  */
 304
 305 #define MAX_CONSIDERED_USES \
 306   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 307
 308 /* If there are at most this number of ivs in the set, try removing unnecessary
 309    ivs from the set always.  */
 310
 311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 312   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 313
 314 /* The list of trees for that the decl_rtl field must be reset is stored
 315    here.  */
 316
 317 static VEC(tree,heap) *decl_rtl_to_reset;
 318
 319 /* Number of uses recorded in DATA.  */
 320
 321 static inline unsigned
 322 n_iv_uses (struct ivopts_data *data)
 323 {
 324   return VEC_length (iv_use_p, data->iv_uses);
 325 }
 326
 327 /* Ith use recorded in DATA.  */
 328
 329 static inline struct iv_use *
 330 iv_use (struct ivopts_data *data, unsigned i)
 331 {
 332   return VEC_index (iv_use_p, data->iv_uses, i);
 333 }
 334
 335 /* Number of candidates recorded in DATA.  */
 336
 337 static inline unsigned
 338 n_iv_cands (struct ivopts_data *data)
 339 {
 340   return VEC_length (iv_cand_p, data->iv_candidates);
 341 }
 342
 343 /* Ith candidate recorded in DATA.  */
 344
 345 static inline struct iv_cand *
 346 iv_cand (struct ivopts_data *data, unsigned i)
 347 {
 348   return VEC_index (iv_cand_p, data->iv_candidates, i);
 349 }
 350
 351 /* The data for LOOP.  */
 352
 353 static inline struct loop_data *
 354 loop_data (struct loop *loop)
 355 {
 356   return loop->aux;
 357 }
 358
 359 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 360
 361 static edge
 362 single_dom_exit (struct loop *loop)
 363 {
 364   edge exit = loop->single_exit;
 365
 366   if (!exit)
 367     return NULL;
 368
 369   if (!just_once_each_iteration_p (loop, exit->src))
 370     return NULL;
 371
 372   return exit;
 373 }
 374
 375 /* Dumps information about the induction variable IV to FILE.  */
 376
 377 extern void dump_iv (FILE *, struct iv *);
 378 void
 379 dump_iv (FILE *file, struct iv *iv)
 380 {
 381   if (iv->ssa_name)
 382     {
 383       fprintf (file, "ssa name ");
 384       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 385       fprintf (file, "\n");
 386     }
 387
 388   fprintf (file, "  type ");
 389   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 390   fprintf (file, "\n");
 391
 392   if (iv->step)
 393     {
 394       fprintf (file, "  base ");
 395       print_generic_expr (file, iv->base, TDF_SLIM);
 396       fprintf (file, "\n");
 397
 398       fprintf (file, "  step ");
 399       print_generic_expr (file, iv->step, TDF_SLIM);
 400       fprintf (file, "\n");
 401     }
 402   else
 403     {
 404       fprintf (file, "  invariant ");
 405       print_generic_expr (file, iv->base, TDF_SLIM);
 406       fprintf (file, "\n");
 407     }
 408
 409   if (iv->base_object)
 410     {
 411       fprintf (file, "  base object ");
 412       print_generic_expr (file, iv->base_object, TDF_SLIM);
 413       fprintf (file, "\n");
 414     }
 415
 416   if (iv->biv_p)
 417     fprintf (file, "  is a biv\n");
 418 }
 419
 420 /* Dumps information about the USE to FILE.  */
 421
 422 extern void dump_use (FILE *, struct iv_use *);
 423 void
 424 dump_use (FILE *file, struct iv_use *use)
 425 {
 426   fprintf (file, "use %d\n", use->id);
 427
 428   switch (use->type)
 429     {
 430     case USE_NONLINEAR_EXPR:
 431       fprintf (file, "  generic\n");
 432       break;
 433
 434     case USE_OUTER:
 435       fprintf (file, "  outside\n");
 436       break;
 437
 438     case USE_ADDRESS:
 439       fprintf (file, "  address\n");
 440       break;
 441
 442     case USE_COMPARE:
 443       fprintf (file, "  compare\n");
 444       break;
 445
 446     default:
 447       gcc_unreachable ();
 448     }
 449
 450   fprintf (file, "  in statement ");
 451   print_generic_expr (file, use->stmt, TDF_SLIM);
 452   fprintf (file, "\n");
 453
 454   fprintf (file, "  at position ");
 455   if (use->op_p)
 456     print_generic_expr (file, *use->op_p, TDF_SLIM);
 457   fprintf (file, "\n");
 458
 459   dump_iv (file, use->iv);
 460
 461   if (use->related_cands)
 462     {
 463       fprintf (file, "  related candidates ");
 464       dump_bitmap (file, use->related_cands);
 465     }
 466 }
 467
 468 /* Dumps information about the uses to FILE.  */
 469
 470 extern void dump_uses (FILE *, struct ivopts_data *);
 471 void
 472 dump_uses (FILE *file, struct ivopts_data *data)
 473 {
 474   unsigned i;
 475   struct iv_use *use;
 476
 477   for (i = 0; i < n_iv_uses (data); i++)
 478     {
 479       use = iv_use (data, i);
 480
 481       dump_use (file, use);
 482       fprintf (file, "\n");
 483     }
 484 }
 485
 486 /* Dumps information about induction variable candidate CAND to FILE.  */
 487
 488 extern void dump_cand (FILE *, struct iv_cand *);
 489 void
 490 dump_cand (FILE *file, struct iv_cand *cand)
 491 {
 492   struct iv *iv = cand->iv;
 493
 494   fprintf (file, "candidate %d%s\n",
 495            cand->id, cand->important ? " (important)" : "");
 496
 497   if (cand->depends_on)
 498     {
 499       fprintf (file, "  depends on ");
 500       dump_bitmap (file, cand->depends_on);
 501     }
 502
 503   if (!iv)
 504     {
 505       fprintf (file, "  final value replacement\n");
 506       return;
 507     }
 508
 509   switch (cand->pos)
 510     {
 511     case IP_NORMAL:
 512       fprintf (file, "  incremented before exit test\n");
 513       break;
 514
 515     case IP_END:
 516       fprintf (file, "  incremented at end\n");
 517       break;
 518
 519     case IP_ORIGINAL:
 520       fprintf (file, "  original biv\n");
 521       break;
 522     }
 523
 524   dump_iv (file, iv);
 525 }
 526
 527 /* Returns the info for ssa version VER.  */
 528
 529 static inline struct version_info *
 530 ver_info (struct ivopts_data *data, unsigned ver)
 531 {
 532   return data->version_info + ver;
 533 }
 534
 535 /* Returns the info for ssa name NAME.  */
 536
 537 static inline struct version_info *
 538 name_info (struct ivopts_data *data, tree name)
 539 {
 540   return ver_info (data, SSA_NAME_VERSION (name));
 541 }
 542
 543 /* Checks whether there exists number X such that X * B = A, counting modulo
 544    2^BITS.  */
 545
 546 static bool
 547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 548         HOST_WIDE_INT *x)
 549 {
 550   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 551   unsigned HOST_WIDE_INT inv, ex, val;
 552   unsigned i;
 553
 554   a &= mask;
 555   b &= mask;
 556
 557   /* First divide the whole equation by 2 as long as possible.  */
 558   while (!(a & 1) && !(b & 1))
 559     {
 560       a >>= 1;
 561       b >>= 1;
 562       bits--;
 563       mask >>= 1;
 564     }
 565
 566   if (!(b & 1))
 567     {
 568       /* If b is still even, a is odd and there is no such x.  */
 569       return false;
 570     }
 571
 572   /* Find the inverse of b.  We compute it as
 573      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 574   inv = 1;
 575   ex = b;
 576   for (i = 0; i < bits - 1; i++)
 577     {
 578       inv = (inv * ex) & mask;
 579       ex = (ex * ex) & mask;
 580     }
 581
 582   val = (a * inv) & mask;
 583
 584   gcc_assert (((val * b) & mask) == a);
 585
 586   if ((val >> (bits - 1)) & 1)
 587     val |= ~mask;
 588
 589   *x = val;
 590
 591   return true;
 592 }
 593
 594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 595    emitted in LOOP.  */
 596
 597 static bool
 598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 599 {
 600   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 601
 602   gcc_assert (bb);
 603
 604   if (sbb == loop->latch)
 605     return true;
 606
 607   if (sbb != bb)
 608     return false;
 609
 610   return stmt == last_stmt (bb);
 611 }
 612
 613 /* Returns true if STMT if after the place where the original induction
 614    variable CAND is incremented.  */
 615
 616 static bool
 617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 618 {
 619   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 620   basic_block stmt_bb = bb_for_stmt (stmt);
 621   block_stmt_iterator bsi;
 622
 623   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 624     return false;
 625
 626   if (stmt_bb != cand_bb)
 627     return true;
 628
 629   /* Scan the block from the end, since the original ivs are usually
 630      incremented at the end of the loop body.  */
 631   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 632     {
 633       if (bsi_stmt (bsi) == cand->incremented_at)
 634         return false;
 635       if (bsi_stmt (bsi) == stmt)
 636         return true;
 637     }
 638 }
 639
 640 /* Returns true if STMT if after the place where the induction variable
 641    CAND is incremented in LOOP.  */
 642
 643 static bool
 644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 645 {
 646   switch (cand->pos)
 647     {
 648     case IP_END:
 649       return false;
 650
 651     case IP_NORMAL:
 652       return stmt_after_ip_normal_pos (loop, stmt);
 653
 654     case IP_ORIGINAL:
 655       return stmt_after_ip_original_pos (cand, stmt);
 656
 657     default:
 658       gcc_unreachable ();
 659     }
 660 }
 661
 662 /* Element of the table in that we cache the numbers of iterations obtained
 663    from exits of the loop.  */
 664
 665 struct nfe_cache_elt
 666 {
 667   /* The edge for that the number of iterations is cached.  */
 668   edge exit;
 669
 670   /* True if the # of iterations was successfully determined.  */
 671   bool valid_p;
 672
 673   /* Description of # of iterations.  */
 674   struct tree_niter_desc niter;
 675 };
 676
 677 /* Hash function for nfe_cache_elt E.  */
 678
 679 static hashval_t
 680 nfe_hash (const void *e)
 681 {
 682   const struct nfe_cache_elt *elt = e;
 683
 684   return htab_hash_pointer (elt->exit);
 685 }
 686
 687 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 688
 689 static int
 690 nfe_eq (const void *e1, const void *e2)
 691 {
 692   const struct nfe_cache_elt *elt1 = e1;
 693
 694   return elt1->exit == e2;
 695 }
 696
 697 /*  Returns structure describing number of iterations determined from
 698     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 699
 700 static struct tree_niter_desc *
 701 niter_for_exit (struct ivopts_data *data, edge exit)
 702 {
 703   struct nfe_cache_elt *nfe_desc;
 704   PTR *slot;
 705
 706   slot = htab_find_slot_with_hash (data->niters, exit,
 707                                    htab_hash_pointer (exit),
 708                                    INSERT);
 709
 710   if (!*slot)
 711     {
 712       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 713       nfe_desc->exit = exit;
 714       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 715                                                      exit, &nfe_desc->niter);
 716       *slot = nfe_desc;
 717     }
 718   else
 719     nfe_desc = *slot;
 720
 721   if (!nfe_desc->valid_p)
 722     return NULL;
 723
 724   return &nfe_desc->niter;
 725 }
 726
 727 /* Returns structure describing number of iterations determined from
 728    single dominating exit of DATA->current_loop, or NULL if something
 729    goes wrong.  */
 730
 731 static struct tree_niter_desc *
 732 niter_for_single_dom_exit (struct ivopts_data *data)
 733 {
 734   edge exit = single_dom_exit (data->current_loop);
 735
 736   if (!exit)
 737     return NULL;
 738
 739   return niter_for_exit (data, exit);
 740 }
 741
 742 /* Initializes data structures used by the iv optimization pass, stored
 743    in DATA.  LOOPS is the loop tree.  */
 744
 745 static void
 746 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 747 {
 748   unsigned i;
 749
 750   data->version_info_size = 2 * num_ssa_names;
 751   data->version_info = xcalloc (data->version_info_size,
 752                                 sizeof (struct version_info));
 753   data->relevant = BITMAP_ALLOC (NULL);
 754   data->important_candidates = BITMAP_ALLOC (NULL);
 755   data->max_inv_id = 0;
 756   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 757
 758   for (i = 1; i < loops->num; i++)
 759     if (loops->parray[i])
 760       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 761
 762   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 763   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 764   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 765 }
 766
 767 /* Returns a memory object to that EXPR points.  In case we are able to
 768    determine that it does not point to any such object, NULL is returned.  */
 769
 770 static tree
 771 determine_base_object (tree expr)
 772 {
 773   enum tree_code code = TREE_CODE (expr);
 774   tree base, obj, op0, op1;
 775
 776   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 777     return NULL_TREE;
 778
 779   switch (code)
 780     {
 781     case INTEGER_CST:
 782       return NULL_TREE;
 783
 784     case ADDR_EXPR:
 785       obj = TREE_OPERAND (expr, 0);
 786       base = get_base_address (obj);
 787
 788       if (!base)
 789         return expr;
 790
 791       if (TREE_CODE (base) == INDIRECT_REF)
 792         return determine_base_object (TREE_OPERAND (base, 0));
 793
 794       return fold_convert (ptr_type_node,
 795                            build_fold_addr_expr (base));
 796
 797     case PLUS_EXPR:
 798     case MINUS_EXPR:
 799       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 800       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 801
 802       if (!op1)
 803         return op0;
 804
 805       if (!op0)
 806         return (code == PLUS_EXPR
 807                 ? op1
 808                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 809
 810       return fold_build2 (code, ptr_type_node, op0, op1);
 811
 812     case NOP_EXPR:
 813     case CONVERT_EXPR:
 814       return determine_base_object (TREE_OPERAND (expr, 0));
 815
 816     default:
 817       return fold_convert (ptr_type_node, expr);
 818     }
 819 }
 820
 821 /* Allocates an induction variable with given initial value BASE and step STEP
 822    for loop LOOP.  */
 823
 824 static struct iv *
 825 alloc_iv (tree base, tree step)
 826 {
 827   struct iv *iv = xcalloc (1, sizeof (struct iv));
 828
 829   if (step && integer_zerop (step))
 830     step = NULL_TREE;
 831
 832   iv->base = base;
 833   iv->base_object = determine_base_object (base);
 834   iv->step = step;
 835   iv->biv_p = false;
 836   iv->have_use_for = false;
 837   iv->use_id = 0;
 838   iv->ssa_name = NULL_TREE;
 839
 840   return iv;
 841 }
 842
 843 /* Sets STEP and BASE for induction variable IV.  */
 844
 845 static void
 846 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 847 {
 848   struct version_info *info = name_info (data, iv);
 849
 850   gcc_assert (!info->iv);
 851
 852   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 853   info->iv = alloc_iv (base, step);
 854   info->iv->ssa_name = iv;
 855 }
 856
 857 /* Finds induction variable declaration for VAR.  */
 858
 859 static struct iv *
 860 get_iv (struct ivopts_data *data, tree var)
 861 {
 862   basic_block bb;
 863
 864   if (!name_info (data, var)->iv)
 865     {
 866       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 867
 868       if (!bb
 869           || !flow_bb_inside_loop_p (data->current_loop, bb))
 870         set_iv (data, var, var, NULL_TREE);
 871     }
 872
 873   return name_info (data, var)->iv;
 874 }
 875
 876 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 877    not define a simple affine biv with nonzero step.  */
 878
 879 static tree
 880 determine_biv_step (tree phi)
 881 {
 882   struct loop *loop = bb_for_stmt (phi)->loop_father;
 883   tree name = PHI_RESULT (phi), base, step;
 884
 885   if (!is_gimple_reg (name))
 886     return NULL_TREE;
 887
 888   if (!simple_iv (loop, phi, name, &base, &step, true))
 889     return NULL_TREE;
 890
 891   if (zero_p (step))
 892     return NULL_TREE;
 893
 894   return step;
 895 }
 896
 897 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 898
 899 static bool
 900 abnormal_ssa_name_p (tree exp)
 901 {
 902   if (!exp)
 903     return false;
 904
 905   if (TREE_CODE (exp) != SSA_NAME)
 906     return false;
 907
 908   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 909 }
 910
 911 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 912    abnormal phi node.  Callback for for_each_index.  */
 913
 914 static bool
 915 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 916                                   void *data ATTRIBUTE_UNUSED)
 917 {
 918   if (TREE_CODE (base) == ARRAY_REF)
 919     {
 920       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 921         return false;
 922       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 923         return false;
 924     }
 925
 926   return !abnormal_ssa_name_p (*index);
 927 }
 928
 929 /* Returns true if EXPR contains a ssa name that occurs in an
 930    abnormal phi node.  */
 931
 932 static bool
 933 contains_abnormal_ssa_name_p (tree expr)
 934 {
 935   enum tree_code code;
 936   enum tree_code_class class;
 937
 938   if (!expr)
 939     return false;
 940
 941   code = TREE_CODE (expr);
 942   class = TREE_CODE_CLASS (code);
 943
 944   if (code == SSA_NAME)
 945     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 946
 947   if (code == INTEGER_CST
 948       || is_gimple_min_invariant (expr))
 949     return false;
 950
 951   if (code == ADDR_EXPR)
 952     return !for_each_index (&TREE_OPERAND (expr, 0),
 953                             idx_contains_abnormal_ssa_name_p,
 954                             NULL);
 955
 956   switch (class)
 957     {
 958     case tcc_binary:
 959     case tcc_comparison:
 960       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 961         return true;
 962
 963       /* Fallthru.  */
 964     case tcc_unary:
 965       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 966         return true;
 967
 968       break;
 969
 970     default:
 971       gcc_unreachable ();
 972     }
 973
 974   return false;
 975 }
 976
 977 /* Finds basic ivs.  */
 978
 979 static bool
 980 find_bivs (struct ivopts_data *data)
 981 {
 982   tree phi, step, type, base;
 983   bool found = false;
 984   struct loop *loop = data->current_loop;
 985
 986   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 987     {
 988       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 989         continue;
 990
 991       step = determine_biv_step (phi);
 992       if (!step)
 993         continue;
 994
 995       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 996       base = expand_simple_operations (base);
 997       if (contains_abnormal_ssa_name_p (base)
 998           || contains_abnormal_ssa_name_p (step))
 999         continue;
1000
1001       type = TREE_TYPE (PHI_RESULT (phi));
1002       base = fold_convert (type, base);
1003       if (step)
1004         step = fold_convert (type, step);
1005
1006       set_iv (data, PHI_RESULT (phi), base, step);
1007       found = true;
1008     }
1009
1010   return found;
1011 }
1012
1013 /* Marks basic ivs.  */
1014
1015 static void
1016 mark_bivs (struct ivopts_data *data)
1017 {
1018   tree phi, var;
1019   struct iv *iv, *incr_iv;
1020   struct loop *loop = data->current_loop;
1021   basic_block incr_bb;
1022
1023   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1024     {
1025       iv = get_iv (data, PHI_RESULT (phi));
1026       if (!iv)
1027         continue;
1028
1029       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1030       incr_iv = get_iv (data, var);
1031       if (!incr_iv)
1032         continue;
1033
1034       /* If the increment is in the subloop, ignore it.  */
1035       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1036       if (incr_bb->loop_father != data->current_loop
1037           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1038         continue;
1039
1040       iv->biv_p = true;
1041       incr_iv->biv_p = true;
1042     }
1043 }
1044
1045 /* Checks whether STMT defines a linear induction variable and stores its
1046    parameters to BASE and STEP.  */
1047
1048 static bool
1049 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1050                         tree *base, tree *step)
1051 {
1052   tree lhs;
1053   struct loop *loop = data->current_loop;
1054
1055   *base = NULL_TREE;
1056   *step = NULL_TREE;
1057
1058   if (TREE_CODE (stmt) != MODIFY_EXPR)
1059     return false;
1060
1061   lhs = TREE_OPERAND (stmt, 0);
1062   if (TREE_CODE (lhs) != SSA_NAME)
1063     return false;
1064
1065   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1066     return false;
1067   *base = expand_simple_operations (*base);
1068
1069   if (contains_abnormal_ssa_name_p (*base)
1070       || contains_abnormal_ssa_name_p (*step))
1071     return false;
1072
1073   return true;
1074 }
1075
1076 /* Finds general ivs in statement STMT.  */
1077
1078 static void
1079 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1080 {
1081   tree base, step;
1082
1083   if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1084     return;
1085
1086   set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1087 }
1088
1089 /* Finds general ivs in basic block BB.  */
1090
1091 static void
1092 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1093 {
1094   block_stmt_iterator bsi;
1095
1096   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1097     find_givs_in_stmt (data, bsi_stmt (bsi));
1098 }
1099
1100 /* Finds general ivs.  */
1101
1102 static void
1103 find_givs (struct ivopts_data *data)
1104 {
1105   struct loop *loop = data->current_loop;
1106   basic_block *body = get_loop_body_in_dom_order (loop);
1107   unsigned i;
1108
1109   for (i = 0; i < loop->num_nodes; i++)
1110     find_givs_in_bb (data, body[i]);
1111   free (body);
1112 }
1113
1114 /* For each ssa name defined in LOOP determines whether it is an induction
1115    variable and if so, its initial value and step.  */
1116
1117 static bool
1118 find_induction_variables (struct ivopts_data *data)
1119 {
1120   unsigned i;
1121   bitmap_iterator bi;
1122
1123   if (!find_bivs (data))
1124     return false;
1125
1126   find_givs (data);
1127   mark_bivs (data);
1128
1129   if (dump_file && (dump_flags & TDF_DETAILS))
1130     {
1131       struct tree_niter_desc *niter;
1132
1133       niter = niter_for_single_dom_exit (data);
1134
1135       if (niter)
1136         {
1137           fprintf (dump_file, "  number of iterations ");
1138           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1139           fprintf (dump_file, "\n");
1140
1141           fprintf (dump_file, "  may be zero if ");
1142           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1143           fprintf (dump_file, "\n");
1144           fprintf (dump_file, "\n");
1145         };
1146
1147       fprintf (dump_file, "Induction variables:\n\n");
1148
1149       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1150         {
1151           if (ver_info (data, i)->iv)
1152             dump_iv (dump_file, ver_info (data, i)->iv);
1153         }
1154     }
1155
1156   return true;
1157 }
1158
1159 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1160
1161 static struct iv_use *
1162 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1163             tree stmt, enum use_type use_type)
1164 {
1165   struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1166
1167   use->id = n_iv_uses (data);
1168   use->type = use_type;
1169   use->iv = iv;
1170   use->stmt = stmt;
1171   use->op_p = use_p;
1172   use->related_cands = BITMAP_ALLOC (NULL);
1173
1174   /* To avoid showing ssa name in the dumps, if it was not reset by the
1175      caller.  */
1176   iv->ssa_name = NULL_TREE;
1177
1178   if (dump_file && (dump_flags & TDF_DETAILS))
1179     dump_use (dump_file, use);
1180
1181   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1182
1183   return use;
1184 }
1185
1186 /* Checks whether OP is a loop-level invariant and if so, records it.
1187    NONLINEAR_USE is true if the invariant is used in a way we do not
1188    handle specially.  */
1189
1190 static void
1191 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1192 {
1193   basic_block bb;
1194   struct version_info *info;
1195
1196   if (TREE_CODE (op) != SSA_NAME
1197       || !is_gimple_reg (op))
1198     return;
1199
1200   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1201   if (bb
1202       && flow_bb_inside_loop_p (data->current_loop, bb))
1203     return;
1204
1205   info = name_info (data, op);
1206   info->name = op;
1207   info->has_nonlin_use |= nonlinear_use;
1208   if (!info->inv_id)
1209     info->inv_id = ++data->max_inv_id;
1210   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1211 }
1212
1213 /* Checks whether the use OP is interesting and if so, records it
1214    as TYPE.  */
1215
1216 static struct iv_use *
1217 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1218                                        enum use_type type)
1219 {
1220   struct iv *iv;
1221   struct iv *civ;
1222   tree stmt;
1223   struct iv_use *use;
1224
1225   if (TREE_CODE (op) != SSA_NAME)
1226     return NULL;
1227
1228   iv = get_iv (data, op);
1229   if (!iv)
1230     return NULL;
1231
1232   if (iv->have_use_for)
1233     {
1234       use = iv_use (data, iv->use_id);
1235
1236       gcc_assert (use->type == USE_NONLINEAR_EXPR
1237                   || use->type == USE_OUTER);
1238
1239       if (type == USE_NONLINEAR_EXPR)
1240         use->type = USE_NONLINEAR_EXPR;
1241       return use;
1242     }
1243
1244   if (zero_p (iv->step))
1245     {
1246       record_invariant (data, op, true);
1247       return NULL;
1248     }
1249   iv->have_use_for = true;
1250
1251   civ = xmalloc (sizeof (struct iv));
1252   *civ = *iv;
1253
1254   stmt = SSA_NAME_DEF_STMT (op);
1255   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1256               || TREE_CODE (stmt) == MODIFY_EXPR);
1257
1258   use = record_use (data, NULL, civ, stmt, type);
1259   iv->use_id = use->id;
1260
1261   return use;
1262 }
1263
1264 /* Checks whether the use OP is interesting and if so, records it.  */
1265
1266 static struct iv_use *
1267 find_interesting_uses_op (struct ivopts_data *data, tree op)
1268 {
1269   return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1270 }
1271
1272 /* Records a definition of induction variable OP that is used outside of the
1273    loop.  */
1274
1275 static struct iv_use *
1276 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1277 {
1278   return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1279 }
1280
1281 /* Checks whether the condition *COND_P in STMT is interesting
1282    and if so, records it.  */
1283
1284 static void
1285 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1286 {
1287   tree *op0_p;
1288   tree *op1_p;
1289   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1290   struct iv const_iv;
1291   tree zero = integer_zero_node;
1292
1293   const_iv.step = NULL_TREE;
1294
1295   if (TREE_CODE (*cond_p) != SSA_NAME
1296       && !COMPARISON_CLASS_P (*cond_p))
1297     return;
1298
1299   if (TREE_CODE (*cond_p) == SSA_NAME)
1300     {
1301       op0_p = cond_p;
1302       op1_p = &zero;
1303     }
1304   else
1305     {
1306       op0_p = &TREE_OPERAND (*cond_p, 0);
1307       op1_p = &TREE_OPERAND (*cond_p, 1);
1308     }
1309
1310   if (TREE_CODE (*op0_p) == SSA_NAME)
1311     iv0 = get_iv (data, *op0_p);
1312   else
1313     iv0 = &const_iv;
1314
1315   if (TREE_CODE (*op1_p) == SSA_NAME)
1316     iv1 = get_iv (data, *op1_p);
1317   else
1318     iv1 = &const_iv;
1319
1320   if (/* When comparing with non-invariant value, we may not do any senseful
1321          induction variable elimination.  */
1322       (!iv0 || !iv1)
1323       /* Eliminating condition based on two ivs would be nontrivial.
1324          ??? TODO -- it is not really important to handle this case.  */
1325       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1326     {
1327       find_interesting_uses_op (data, *op0_p);
1328       find_interesting_uses_op (data, *op1_p);
1329       return;
1330     }
1331
1332   if (zero_p (iv0->step) && zero_p (iv1->step))
1333     {
1334       /* If both are invariants, this is a work for unswitching.  */
1335       return;
1336     }
1337
1338   civ = xmalloc (sizeof (struct iv));
1339   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1340   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1341 }
1342
1343 /* Returns true if expression EXPR is obviously invariant in LOOP,
1344    i.e. if all its operands are defined outside of the LOOP.  */
1345
1346 bool
1347 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1348 {
1349   basic_block def_bb;
1350   unsigned i, len;
1351
1352   if (is_gimple_min_invariant (expr))
1353     return true;
1354
1355   if (TREE_CODE (expr) == SSA_NAME)
1356     {
1357       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1358       if (def_bb
1359           && flow_bb_inside_loop_p (loop, def_bb))
1360         return false;
1361
1362       return true;
1363     }
1364
1365   if (!EXPR_P (expr))
1366     return false;
1367
1368   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1369   for (i = 0; i < len; i++)
1370     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1371       return false;
1372
1373   return true;
1374 }
1375
1376 /* Cumulates the steps of indices into DATA and replaces their values with the
1377    initial ones.  Returns false when the value of the index cannot be determined.
1378    Callback for for_each_index.  */
1379
1380 struct ifs_ivopts_data
1381 {
1382   struct ivopts_data *ivopts_data;
1383   tree stmt;
1384   tree *step_p;
1385 };
1386
1387 static bool
1388 idx_find_step (tree base, tree *idx, void *data)
1389 {
1390   struct ifs_ivopts_data *dta = data;
1391   struct iv *iv;
1392   tree step, iv_step, lbound, off;
1393   struct loop *loop = dta->ivopts_data->current_loop;
1394
1395   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1396       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1397     return false;
1398
1399   /* If base is a component ref, require that the offset of the reference
1400      be invariant.  */
1401   if (TREE_CODE (base) == COMPONENT_REF)
1402     {
1403       off = component_ref_field_offset (base);
1404       return expr_invariant_in_loop_p (loop, off);
1405     }
1406
1407   /* If base is array, first check whether we will be able to move the
1408      reference out of the loop (in order to take its address in strength
1409      reduction).  In order for this to work we need both lower bound
1410      and step to be loop invariants.  */
1411   if (TREE_CODE (base) == ARRAY_REF)
1412     {
1413       step = array_ref_element_size (base);
1414       lbound = array_ref_low_bound (base);
1415
1416       if (!expr_invariant_in_loop_p (loop, step)
1417           || !expr_invariant_in_loop_p (loop, lbound))
1418         return false;
1419     }
1420
1421   if (TREE_CODE (*idx) != SSA_NAME)
1422     return true;
1423
1424   iv = get_iv (dta->ivopts_data, *idx);
1425   if (!iv)
1426     return false;
1427
1428   *idx = iv->base;
1429
1430   if (!iv->step)
1431     return true;
1432
1433   if (TREE_CODE (base) == ARRAY_REF)
1434     {
1435       step = array_ref_element_size (base);
1436
1437       /* We only handle addresses whose step is an integer constant.  */
1438       if (TREE_CODE (step) != INTEGER_CST)
1439         return false;
1440     }
1441   else
1442     /* The step for pointer arithmetics already is 1 byte.  */
1443     step = build_int_cst (sizetype, 1);
1444
1445   iv_step = convert_step (dta->ivopts_data->current_loop,
1446                           sizetype, iv->base, iv->step, dta->stmt);
1447
1448   if (!iv_step)
1449     {
1450       /* The index might wrap.  */
1451       return false;
1452     }
1453
1454   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1455
1456   if (!*dta->step_p)
1457     *dta->step_p = step;
1458   else
1459     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1460
1461   return true;
1462 }
1463
1464 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1465    object is passed to it in DATA.  */
1466
1467 static bool
1468 idx_record_use (tree base, tree *idx,
1469                 void *data)
1470 {
1471   find_interesting_uses_op (data, *idx);
1472   if (TREE_CODE (base) == ARRAY_REF)
1473     {
1474       find_interesting_uses_op (data, array_ref_element_size (base));
1475       find_interesting_uses_op (data, array_ref_low_bound (base));
1476     }
1477   return true;
1478 }
1479
1480 /* Returns true if memory reference REF may be unaligned.  */
1481
1482 static bool
1483 may_be_unaligned_p (tree ref)
1484 {
1485   tree base;
1486   tree base_type;
1487   HOST_WIDE_INT bitsize;
1488   HOST_WIDE_INT bitpos;
1489   tree toffset;
1490   enum machine_mode mode;
1491   int unsignedp, volatilep;
1492   unsigned base_align;
1493
1494   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1495      thus they are not misaligned.  */
1496   if (TREE_CODE (ref) == TARGET_MEM_REF)
1497     return false;
1498
1499   /* The test below is basically copy of what expr.c:normal_inner_ref
1500      does to check whether the object must be loaded by parts when
1501      STRICT_ALIGNMENT is true.  */
1502   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1503                               &unsignedp, &volatilep, true);
1504   base_type = TREE_TYPE (base);
1505   base_align = TYPE_ALIGN (base_type);
1506
1507   if (mode != BLKmode
1508       && (base_align < GET_MODE_ALIGNMENT (mode)
1509           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1510           || bitpos % BITS_PER_UNIT != 0))
1511     return true;
1512
1513   return false;
1514 }
1515
1516 /* Finds addresses in *OP_P inside STMT.  */
1517
1518 static void
1519 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1520 {
1521   tree base = *op_p, step = NULL;
1522   struct iv *civ;
1523   struct ifs_ivopts_data ifs_ivopts_data;
1524
1525   /* Do not play with volatile memory references.  A bit too conservative,
1526      perhaps, but safe.  */
1527   if (stmt_ann (stmt)->has_volatile_ops)
1528     goto fail;
1529
1530   /* Ignore bitfields for now.  Not really something terribly complicated
1531      to handle.  TODO.  */
1532   if (TREE_CODE (base) == COMPONENT_REF
1533       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1534     goto fail;
1535
1536   if (STRICT_ALIGNMENT
1537       && may_be_unaligned_p (base))
1538     goto fail;
1539
1540   base = unshare_expr (base);
1541
1542   if (TREE_CODE (base) == TARGET_MEM_REF)
1543     {
1544       tree type = build_pointer_type (TREE_TYPE (base));
1545       tree astep;
1546
1547       if (TMR_BASE (base)
1548           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1549         {
1550           civ = get_iv (data, TMR_BASE (base));
1551           if (!civ)
1552             goto fail;
1553
1554           TMR_BASE (base) = civ->base;
1555           step = civ->step;
1556         }
1557       if (TMR_INDEX (base)
1558           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1559         {
1560           civ = get_iv (data, TMR_INDEX (base));
1561           if (!civ)
1562             goto fail;
1563
1564           TMR_INDEX (base) = civ->base;
1565           astep = civ->step;
1566
1567           if (astep)
1568             {
1569               if (TMR_STEP (base))
1570                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1571
1572               if (step)
1573                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1574               else
1575                 step = astep;
1576             }
1577         }
1578
1579       if (zero_p (step))
1580         goto fail;
1581       base = tree_mem_ref_addr (type, base);
1582     }
1583   else
1584     {
1585       ifs_ivopts_data.ivopts_data = data;
1586       ifs_ivopts_data.stmt = stmt;
1587       ifs_ivopts_data.step_p = &step;
1588       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1589           || zero_p (step))
1590         goto fail;
1591
1592       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1593       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1594
1595       base = build_fold_addr_expr (base);
1596     }
1597
1598   civ = alloc_iv (base, step);
1599   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1600   return;
1601
1602 fail:
1603   for_each_index (op_p, idx_record_use, data);
1604 }
1605
1606 /* Finds and records invariants used in STMT.  */
1607
1608 static void
1609 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1610 {
1611   ssa_op_iter iter;
1612   use_operand_p use_p;
1613   tree op;
1614
1615   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1616     {
1617       op = USE_FROM_PTR (use_p);
1618       record_invariant (data, op, false);
1619     }
1620 }
1621
1622 /* Finds interesting uses of induction variables in the statement STMT.  */
1623
1624 static void
1625 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1626 {
1627   struct iv *iv;
1628   tree op, lhs, rhs;
1629   ssa_op_iter iter;
1630   use_operand_p use_p;
1631
1632   find_invariants_stmt (data, stmt);
1633
1634   if (TREE_CODE (stmt) == COND_EXPR)
1635     {
1636       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1637       return;
1638     }
1639
1640   if (TREE_CODE (stmt) == MODIFY_EXPR)
1641     {
1642       lhs = TREE_OPERAND (stmt, 0);
1643       rhs = TREE_OPERAND (stmt, 1);
1644
1645       if (TREE_CODE (lhs) == SSA_NAME)
1646         {
1647           /* If the statement defines an induction variable, the uses are not
1648              interesting by themselves.  */
1649
1650           iv = get_iv (data, lhs);
1651
1652           if (iv && !zero_p (iv->step))
1653             return;
1654         }
1655
1656       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1657         {
1658         case tcc_comparison:
1659           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1660           return;
1661
1662         case tcc_reference:
1663           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1664           if (REFERENCE_CLASS_P (lhs))
1665             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1666           return;
1667
1668         default: ;
1669         }
1670
1671       if (REFERENCE_CLASS_P (lhs)
1672           && is_gimple_val (rhs))
1673         {
1674           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1675           find_interesting_uses_op (data, rhs);
1676           return;
1677         }
1678
1679       /* TODO -- we should also handle address uses of type
1680
1681          memory = call (whatever);
1682
1683          and
1684
1685          call (memory).  */
1686     }
1687
1688   if (TREE_CODE (stmt) == PHI_NODE
1689       && bb_for_stmt (stmt) == data->current_loop->header)
1690     {
1691       lhs = PHI_RESULT (stmt);
1692       iv = get_iv (data, lhs);
1693
1694       if (iv && !zero_p (iv->step))
1695         return;
1696     }
1697
1698   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1699     {
1700       op = USE_FROM_PTR (use_p);
1701
1702       if (TREE_CODE (op) != SSA_NAME)
1703         continue;
1704
1705       iv = get_iv (data, op);
1706       if (!iv)
1707         continue;
1708
1709       find_interesting_uses_op (data, op);
1710     }
1711 }
1712
1713 /* Finds interesting uses of induction variables outside of loops
1714    on loop exit edge EXIT.  */
1715
1716 static void
1717 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1718 {
1719   tree phi, def;
1720
1721   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1722     {
1723       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1724       find_interesting_uses_outer (data, def);
1725     }
1726 }
1727
1728 /* Finds uses of the induction variables that are interesting.  */
1729
1730 static void
1731 find_interesting_uses (struct ivopts_data *data)
1732 {
1733   basic_block bb;
1734   block_stmt_iterator bsi;
1735   tree phi;
1736   basic_block *body = get_loop_body (data->current_loop);
1737   unsigned i;
1738   struct version_info *info;
1739   edge e;
1740
1741   if (dump_file && (dump_flags & TDF_DETAILS))
1742     fprintf (dump_file, "Uses:\n\n");
1743
1744   for (i = 0; i < data->current_loop->num_nodes; i++)
1745     {
1746       edge_iterator ei;
1747       bb = body[i];
1748
1749       FOR_EACH_EDGE (e, ei, bb->succs)
1750         if (e->dest != EXIT_BLOCK_PTR
1751             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1752           find_interesting_uses_outside (data, e);
1753
1754       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1755         find_interesting_uses_stmt (data, phi);
1756       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1757         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1758     }
1759
1760   if (dump_file && (dump_flags & TDF_DETAILS))
1761     {
1762       bitmap_iterator bi;
1763
1764       fprintf (dump_file, "\n");
1765
1766       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1767         {
1768           info = ver_info (data, i);
1769           if (info->inv_id)
1770             {
1771               fprintf (dump_file, "  ");
1772               print_generic_expr (dump_file, info->name, TDF_SLIM);
1773               fprintf (dump_file, " is invariant (%d)%s\n",
1774                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1775             }
1776         }
1777
1778       fprintf (dump_file, "\n");
1779     }
1780
1781   free (body);
1782 }
1783
1784 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1785    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1786    we are at the top-level of the processed address.  */
1787
1788 static tree
1789 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1790                 unsigned HOST_WIDE_INT *offset)
1791 {
1792   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1793   enum tree_code code;
1794   tree type, orig_type = TREE_TYPE (expr);
1795   unsigned HOST_WIDE_INT off0, off1, st;
1796   tree orig_expr = expr;
1797
1798   STRIP_NOPS (expr);
1799
1800   type = TREE_TYPE (expr);
1801   code = TREE_CODE (expr);
1802   *offset = 0;
1803
1804   switch (code)
1805     {
1806     case INTEGER_CST:
1807       if (!cst_and_fits_in_hwi (expr)
1808           || zero_p (expr))
1809         return orig_expr;
1810
1811       *offset = int_cst_value (expr);
1812       return build_int_cst_type (orig_type, 0);
1813
1814     case PLUS_EXPR:
1815     case MINUS_EXPR:
1816       op0 = TREE_OPERAND (expr, 0);
1817       op1 = TREE_OPERAND (expr, 1);
1818
1819       op0 = strip_offset_1 (op0, false, false, &off0);
1820       op1 = strip_offset_1 (op1, false, false, &off1);
1821
1822       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1823       if (op0 == TREE_OPERAND (expr, 0)
1824           && op1 == TREE_OPERAND (expr, 1))
1825         return orig_expr;
1826
1827       if (zero_p (op1))
1828         expr = op0;
1829       else if (zero_p (op0))
1830         {
1831           if (code == PLUS_EXPR)
1832             expr = op1;
1833           else
1834             expr = fold_build1 (NEGATE_EXPR, type, op1);
1835         }
1836       else
1837         expr = fold_build2 (code, type, op0, op1);
1838
1839       return fold_convert (orig_type, expr);
1840
1841     case ARRAY_REF:
1842       if (!inside_addr)
1843         return orig_expr;
1844
1845       step = array_ref_element_size (expr);
1846       if (!cst_and_fits_in_hwi (step))
1847         break;
1848
1849       st = int_cst_value (step);
1850       op1 = TREE_OPERAND (expr, 1);
1851       op1 = strip_offset_1 (op1, false, false, &off1);
1852       *offset = off1 * st;
1853
1854       if (top_compref
1855           && zero_p (op1))
1856         {
1857           /* Strip the component reference completely.  */
1858           op0 = TREE_OPERAND (expr, 0);
1859           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1860           *offset += off0;
1861           return op0;
1862         }
1863       break;
1864
1865     case COMPONENT_REF:
1866       if (!inside_addr)
1867         return orig_expr;
1868
1869       tmp = component_ref_field_offset (expr);
1870       if (top_compref
1871           && cst_and_fits_in_hwi (tmp))
1872         {
1873           /* Strip the component reference completely.  */
1874           op0 = TREE_OPERAND (expr, 0);
1875           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1876           *offset = off0 + int_cst_value (tmp);
1877           return op0;
1878         }
1879       break;
1880
1881     case ADDR_EXPR:
1882       op0 = TREE_OPERAND (expr, 0);
1883       op0 = strip_offset_1 (op0, true, true, &off0);
1884       *offset += off0;
1885
1886       if (op0 == TREE_OPERAND (expr, 0))
1887         return orig_expr;
1888
1889       expr = build_fold_addr_expr (op0);
1890       return fold_convert (orig_type, expr);
1891
1892     case INDIRECT_REF:
1893       inside_addr = false;
1894       break;
1895
1896     default:
1897       return orig_expr;
1898     }
1899
1900   /* Default handling of expressions for that we want to recurse into
1901      the first operand.  */
1902   op0 = TREE_OPERAND (expr, 0);
1903   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1904   *offset += off0;
1905
1906   if (op0 == TREE_OPERAND (expr, 0)
1907       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1908     return orig_expr;
1909
1910   expr = copy_node (expr);
1911   TREE_OPERAND (expr, 0) = op0;
1912   if (op1)
1913     TREE_OPERAND (expr, 1) = op1;
1914
1915   /* Inside address, we might strip the top level component references,
1916      thus changing type of the expression.  Handling of ADDR_EXPR
1917      will fix that.  */
1918   expr = fold_convert (orig_type, expr);
1919
1920   return expr;
1921 }
1922
1923 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1924
1925 static tree
1926 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1927 {
1928   return strip_offset_1 (expr, false, false, offset);
1929 }
1930
1931 /* Returns variant of TYPE that can be used as base for different uses.
1932    For integer types, we return unsigned variant of the type, which
1933    avoids problems with overflows.  For pointer types, we return void *.  */
1934
1935 static tree
1936 generic_type_for (tree type)
1937 {
1938   if (POINTER_TYPE_P (type))
1939     return ptr_type_node;
1940
1941   if (TYPE_UNSIGNED (type))
1942     return type;
1943
1944   return unsigned_type_for (type);
1945 }
1946
1947 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1948    the bitmap to that we should store it.  */
1949
1950 static struct ivopts_data *fd_ivopts_data;
1951 static tree
1952 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1953 {
1954   bitmap *depends_on = data;
1955   struct version_info *info;
1956
1957   if (TREE_CODE (*expr_p) != SSA_NAME)
1958     return NULL_TREE;
1959   info = name_info (fd_ivopts_data, *expr_p);
1960
1961   if (!info->inv_id || info->has_nonlin_use)
1962     return NULL_TREE;
1963
1964   if (!*depends_on)
1965     *depends_on = BITMAP_ALLOC (NULL);
1966   bitmap_set_bit (*depends_on, info->inv_id);
1967
1968   return NULL_TREE;
1969 }
1970
1971 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1972    position to POS.  If USE is not NULL, the candidate is set as related to
1973    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1974    replacement of the final value of the iv by a direct computation.  */
1975
1976 static struct iv_cand *
1977 add_candidate_1 (struct ivopts_data *data,
1978                  tree base, tree step, bool important, enum iv_position pos,
1979                  struct iv_use *use, tree incremented_at)
1980 {
1981   unsigned i;
1982   struct iv_cand *cand = NULL;
1983   tree type, orig_type;
1984
1985   if (base)
1986     {
1987       orig_type = TREE_TYPE (base);
1988       type = generic_type_for (orig_type);
1989       if (type != orig_type)
1990         {
1991           base = fold_convert (type, base);
1992           if (step)
1993             step = fold_convert (type, step);
1994         }
1995     }
1996
1997   for (i = 0; i < n_iv_cands (data); i++)
1998     {
1999       cand = iv_cand (data, i);
2000
2001       if (cand->pos != pos)
2002         continue;
2003
2004       if (cand->incremented_at != incremented_at)
2005         continue;
2006
2007       if (!cand->iv)
2008         {
2009           if (!base && !step)
2010             break;
2011
2012           continue;
2013         }
2014
2015       if (!base && !step)
2016         continue;
2017
2018       if (!operand_equal_p (base, cand->iv->base, 0))
2019         continue;
2020
2021       if (zero_p (cand->iv->step))
2022         {
2023           if (zero_p (step))
2024             break;
2025         }
2026       else
2027         {
2028           if (step && operand_equal_p (step, cand->iv->step, 0))
2029             break;
2030         }
2031     }
2032
2033   if (i == n_iv_cands (data))
2034     {
2035       cand = xcalloc (1, sizeof (struct iv_cand));
2036       cand->id = i;
2037
2038       if (!base && !step)
2039         cand->iv = NULL;
2040       else
2041         cand->iv = alloc_iv (base, step);
2042
2043       cand->pos = pos;
2044       if (pos != IP_ORIGINAL && cand->iv)
2045         {
2046           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2047           cand->var_after = cand->var_before;
2048         }
2049       cand->important = important;
2050       cand->incremented_at = incremented_at;
2051       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2052
2053       if (step
2054           && TREE_CODE (step) != INTEGER_CST)
2055         {
2056           fd_ivopts_data = data;
2057           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2058         }
2059
2060       if (dump_file && (dump_flags & TDF_DETAILS))
2061         dump_cand (dump_file, cand);
2062     }
2063
2064   if (important && !cand->important)
2065     {
2066       cand->important = true;
2067       if (dump_file && (dump_flags & TDF_DETAILS))
2068         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2069     }
2070
2071   if (use)
2072     {
2073       bitmap_set_bit (use->related_cands, i);
2074       if (dump_file && (dump_flags & TDF_DETAILS))
2075         fprintf (dump_file, "Candidate %d is related to use %d\n",
2076                  cand->id, use->id);
2077     }
2078
2079   return cand;
2080 }
2081
2082 /* Returns true if incrementing the induction variable at the end of the LOOP
2083    is allowed.
2084
2085    The purpose is to avoid splitting latch edge with a biv increment, thus
2086    creating a jump, possibly confusing other optimization passes and leaving
2087    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2088    is not available (so we do not have a better alternative), or if the latch
2089    edge is already nonempty.  */
2090
2091 static bool
2092 allow_ip_end_pos_p (struct loop *loop)
2093 {
2094   if (!ip_normal_pos (loop))
2095     return true;
2096
2097   if (!empty_block_p (ip_end_pos (loop)))
2098     return true;
2099
2100   return false;
2101 }
2102
2103 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2104    position to POS.  If USE is not NULL, the candidate is set as related to
2105    it.  The candidate computation is scheduled on all available positions.  */
2106
2107 static void
2108 add_candidate (struct ivopts_data *data,
2109                tree base, tree step, bool important, struct iv_use *use)
2110 {
2111   if (ip_normal_pos (data->current_loop))
2112     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2113   if (ip_end_pos (data->current_loop)
2114       && allow_ip_end_pos_p (data->current_loop))
2115     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2116 }
2117
2118 /* Add a standard "0 + 1 * iteration" iv candidate for a
2119    type with SIZE bits.  */
2120
2121 static void
2122 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2123                                      unsigned int size)
2124 {
2125   tree type = lang_hooks.types.type_for_size (size, true);
2126   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2127                  true, NULL);
2128 }
2129
2130 /* Adds standard iv candidates.  */
2131
2132 static void
2133 add_standard_iv_candidates (struct ivopts_data *data)
2134 {
2135   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2136
2137   /* The same for a double-integer type if it is still fast enough.  */
2138   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2139     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2140 }
2141
2142
2143 /* Adds candidates bases on the old induction variable IV.  */
2144
2145 static void
2146 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2147 {
2148   tree phi, def;
2149   struct iv_cand *cand;
2150
2151   add_candidate (data, iv->base, iv->step, true, NULL);
2152
2153   /* The same, but with initial value zero.  */
2154   add_candidate (data,
2155                  build_int_cst (TREE_TYPE (iv->base), 0),
2156                  iv->step, true, NULL);
2157
2158   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2159   if (TREE_CODE (phi) == PHI_NODE)
2160     {
2161       /* Additionally record the possibility of leaving the original iv
2162          untouched.  */
2163       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2164       cand = add_candidate_1 (data,
2165                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2166                               SSA_NAME_DEF_STMT (def));
2167       cand->var_before = iv->ssa_name;
2168       cand->var_after = def;
2169     }
2170 }
2171
2172 /* Adds candidates based on the old induction variables.  */
2173
2174 static void
2175 add_old_ivs_candidates (struct ivopts_data *data)
2176 {
2177   unsigned i;
2178   struct iv *iv;
2179   bitmap_iterator bi;
2180
2181   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2182     {
2183       iv = ver_info (data, i)->iv;
2184       if (iv && iv->biv_p && !zero_p (iv->step))
2185         add_old_iv_candidates (data, iv);
2186     }
2187 }
2188
2189 /* Adds candidates based on the value of the induction variable IV and USE.  */
2190
2191 static void
2192 add_iv_value_candidates (struct ivopts_data *data,
2193                          struct iv *iv, struct iv_use *use)
2194 {
2195   unsigned HOST_WIDE_INT offset;
2196   tree base;
2197
2198   add_candidate (data, iv->base, iv->step, false, use);
2199
2200   /* The same, but with initial value zero.  Make such variable important,
2201      since it is generic enough so that possibly many uses may be based
2202      on it.  */
2203   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2204                  iv->step, true, use);
2205
2206   /* Third, try removing the constant offset.  */
2207   base = strip_offset (iv->base, &offset);
2208   if (offset)
2209     add_candidate (data, base, iv->step, false, use);
2210 }
2211
2212 /* Possibly adds pseudocandidate for replacing the final value of USE by
2213    a direct computation.  */
2214
2215 static void
2216 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2217 {
2218   struct tree_niter_desc *niter;
2219
2220   /* We must know where we exit the loop and how many times does it roll.  */
2221   niter = niter_for_single_dom_exit (data);
2222   if (!niter
2223       || !zero_p (niter->may_be_zero))
2224     return;
2225
2226   add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2227 }
2228
2229 /* Adds candidates based on the uses.  */
2230
2231 static void
2232 add_derived_ivs_candidates (struct ivopts_data *data)
2233 {
2234   unsigned i;
2235
2236   for (i = 0; i < n_iv_uses (data); i++)
2237     {
2238       struct iv_use *use = iv_use (data, i);
2239
2240       if (!use)
2241         continue;
2242
2243       switch (use->type)
2244         {
2245         case USE_NONLINEAR_EXPR:
2246         case USE_COMPARE:
2247         case USE_ADDRESS:
2248           /* Just add the ivs based on the value of the iv used here.  */
2249           add_iv_value_candidates (data, use->iv, use);
2250           break;
2251
2252         case USE_OUTER:
2253           add_iv_value_candidates (data, use->iv, use);
2254
2255           /* Additionally, add the pseudocandidate for the possibility to
2256              replace the final value by a direct computation.  */
2257           add_iv_outer_candidates (data, use);
2258           break;
2259
2260         default:
2261           gcc_unreachable ();
2262         }
2263     }
2264 }
2265
2266 /* Record important candidates and add them to related_cands bitmaps
2267    if needed.  */
2268
2269 static void
2270 record_important_candidates (struct ivopts_data *data)
2271 {
2272   unsigned i;
2273   struct iv_use *use;
2274
2275   for (i = 0; i < n_iv_cands (data); i++)
2276     {
2277       struct iv_cand *cand = iv_cand (data, i);
2278
2279       if (cand->important)
2280         bitmap_set_bit (data->important_candidates, i);
2281     }
2282
2283   data->consider_all_candidates = (n_iv_cands (data)
2284                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2285
2286   if (data->consider_all_candidates)
2287     {
2288       /* We will not need "related_cands" bitmaps in this case,
2289          so release them to decrease peak memory consumption.  */
2290       for (i = 0; i < n_iv_uses (data); i++)
2291         {
2292           use = iv_use (data, i);
2293           BITMAP_FREE (use->related_cands);
2294         }
2295     }
2296   else
2297     {
2298       /* Add important candidates to the related_cands bitmaps.  */
2299       for (i = 0; i < n_iv_uses (data); i++)
2300         bitmap_ior_into (iv_use (data, i)->related_cands,
2301                          data->important_candidates);
2302     }
2303 }
2304
2305 /* Finds the candidates for the induction variables.  */
2306
2307 static void
2308 find_iv_candidates (struct ivopts_data *data)
2309 {
2310   /* Add commonly used ivs.  */
2311   add_standard_iv_candidates (data);
2312
2313   /* Add old induction variables.  */
2314   add_old_ivs_candidates (data);
2315
2316   /* Add induction variables derived from uses.  */
2317   add_derived_ivs_candidates (data);
2318
2319   /* Record the important candidates.  */
2320   record_important_candidates (data);
2321 }
2322
2323 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2324    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2325    we allocate a simple list to every use.  */
2326
2327 static void
2328 alloc_use_cost_map (struct ivopts_data *data)
2329 {
2330   unsigned i, size, s, j;
2331
2332   for (i = 0; i < n_iv_uses (data); i++)
2333     {
2334       struct iv_use *use = iv_use (data, i);
2335       bitmap_iterator bi;
2336
2337       if (data->consider_all_candidates)
2338         size = n_iv_cands (data);
2339       else
2340         {
2341           s = 0;
2342           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2343             {
2344               s++;
2345             }
2346
2347           /* Round up to the power of two, so that moduling by it is fast.  */
2348           for (size = 1; size < s; size <<= 1)
2349             continue;
2350         }
2351
2352       use->n_map_members = size;
2353       use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2354     }
2355 }
2356
2357 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2358    on invariants DEPENDS_ON and that the value used in expressing it
2359    is VALUE.*/
2360
2361 static void
2362 set_use_iv_cost (struct ivopts_data *data,
2363                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2364                  bitmap depends_on, tree value)
2365 {
2366   unsigned i, s;
2367
2368   if (cost == INFTY)
2369     {
2370       BITMAP_FREE (depends_on);
2371       return;
2372     }
2373
2374   if (data->consider_all_candidates)
2375     {
2376       use->cost_map[cand->id].cand = cand;
2377       use->cost_map[cand->id].cost = cost;
2378       use->cost_map[cand->id].depends_on = depends_on;
2379       use->cost_map[cand->id].value = value;
2380       return;
2381     }
2382
2383   /* n_map_members is a power of two, so this computes modulo.  */
2384   s = cand->id & (use->n_map_members - 1);
2385   for (i = s; i < use->n_map_members; i++)
2386     if (!use->cost_map[i].cand)
2387       goto found;
2388   for (i = 0; i < s; i++)
2389     if (!use->cost_map[i].cand)
2390       goto found;
2391
2392   gcc_unreachable ();
2393
2394 found:
2395   use->cost_map[i].cand = cand;
2396   use->cost_map[i].cost = cost;
2397   use->cost_map[i].depends_on = depends_on;
2398   use->cost_map[i].value = value;
2399 }
2400
2401 /* Gets cost of (USE, CANDIDATE) pair.  */
2402
2403 static struct cost_pair *
2404 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2405                  struct iv_cand *cand)
2406 {
2407   unsigned i, s;
2408   struct cost_pair *ret;
2409
2410   if (!cand)
2411     return NULL;
2412
2413   if (data->consider_all_candidates)
2414     {
2415       ret = use->cost_map + cand->id;
2416       if (!ret->cand)
2417         return NULL;
2418
2419       return ret;
2420     }
2421
2422   /* n_map_members is a power of two, so this computes modulo.  */
2423   s = cand->id & (use->n_map_members - 1);
2424   for (i = s; i < use->n_map_members; i++)
2425     if (use->cost_map[i].cand == cand)
2426       return use->cost_map + i;
2427
2428   for (i = 0; i < s; i++)
2429     if (use->cost_map[i].cand == cand)
2430       return use->cost_map + i;
2431
2432   return NULL;
2433 }
2434
2435 /* Returns estimate on cost of computing SEQ.  */
2436
2437 static unsigned
2438 seq_cost (rtx seq)
2439 {
2440   unsigned cost = 0;
2441   rtx set;
2442
2443   for (; seq; seq = NEXT_INSN (seq))
2444     {
2445       set = single_set (seq);
2446       if (set)
2447         cost += rtx_cost (set, SET);
2448       else
2449         cost++;
2450     }
2451
2452   return cost;
2453 }
2454
2455 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2456 static rtx
2457 produce_memory_decl_rtl (tree obj, int *regno)
2458 {
2459   rtx x;
2460
2461   gcc_assert (obj);
2462   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2463     {
2464       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2465       x = gen_rtx_SYMBOL_REF (Pmode, name);
2466     }
2467   else
2468     x = gen_raw_REG (Pmode, (*regno)++);
2469
2470   return gen_rtx_MEM (DECL_MODE (obj), x);
2471 }
2472
2473 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2474    walk_tree.  DATA contains the actual fake register number.  */
2475
2476 static tree
2477 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2478 {
2479   tree obj = NULL_TREE;
2480   rtx x = NULL_RTX;
2481   int *regno = data;
2482
2483   switch (TREE_CODE (*expr_p))
2484     {
2485     case ADDR_EXPR:
2486       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2487            handled_component_p (*expr_p);
2488            expr_p = &TREE_OPERAND (*expr_p, 0))
2489         continue;
2490       obj = *expr_p;
2491       if (DECL_P (obj))
2492         x = produce_memory_decl_rtl (obj, regno);
2493       break;
2494
2495     case SSA_NAME:
2496       *ws = 0;
2497       obj = SSA_NAME_VAR (*expr_p);
2498       if (!DECL_RTL_SET_P (obj))
2499         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2500       break;
2501
2502     case VAR_DECL:
2503     case PARM_DECL:
2504     case RESULT_DECL:
2505       *ws = 0;
2506       obj = *expr_p;
2507
2508       if (DECL_RTL_SET_P (obj))
2509         break;
2510
2511       if (DECL_MODE (obj) == BLKmode)
2512         x = produce_memory_decl_rtl (obj, regno);
2513       else
2514         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2515
2516       break;
2517
2518     default:
2519       break;
2520     }
2521
2522   if (x)
2523     {
2524       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2525       SET_DECL_RTL (obj, x);
2526     }
2527
2528   return NULL_TREE;
2529 }
2530
2531 /* Determines cost of the computation of EXPR.  */
2532
2533 static unsigned
2534 computation_cost (tree expr)
2535 {
2536   rtx seq, rslt;
2537   tree type = TREE_TYPE (expr);
2538   unsigned cost;
2539   /* Avoid using hard regs in ways which may be unsupported.  */
2540   int regno = LAST_VIRTUAL_REGISTER + 1;
2541
2542   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2543   start_sequence ();
2544   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2545   seq = get_insns ();
2546   end_sequence ();
2547
2548   cost = seq_cost (seq);
2549   if (MEM_P (rslt))
2550     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2551
2552   return cost;
2553 }
2554
2555 /* Returns variable containing the value of candidate CAND at statement AT.  */
2556
2557 static tree
2558 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2559 {
2560   if (stmt_after_increment (loop, cand, stmt))
2561     return cand->var_after;
2562   else
2563     return cand->var_before;
2564 }
2565
2566 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2567    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2568
2569 int
2570 tree_int_cst_sign_bit (tree t)
2571 {
2572   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2573   unsigned HOST_WIDE_INT w;
2574
2575   if (bitno < HOST_BITS_PER_WIDE_INT)
2576     w = TREE_INT_CST_LOW (t);
2577   else
2578     {
2579       w = TREE_INT_CST_HIGH (t);
2580       bitno -= HOST_BITS_PER_WIDE_INT;
2581     }
2582
2583   return (w >> bitno) & 1;
2584 }
2585
2586 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2587    return cst.  Otherwise return NULL_TREE.  */
2588
2589 static tree
2590 constant_multiple_of (tree type, tree top, tree bot)
2591 {
2592   tree res, mby, p0, p1;
2593   enum tree_code code;
2594   bool negate;
2595
2596   STRIP_NOPS (top);
2597   STRIP_NOPS (bot);
2598
2599   if (operand_equal_p (top, bot, 0))
2600     return build_int_cst (type, 1);
2601
2602   code = TREE_CODE (top);
2603   switch (code)
2604     {
2605     case MULT_EXPR:
2606       mby = TREE_OPERAND (top, 1);
2607       if (TREE_CODE (mby) != INTEGER_CST)
2608         return NULL_TREE;
2609
2610       res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2611       if (!res)
2612         return NULL_TREE;
2613
2614       return fold_binary_to_constant (MULT_EXPR, type, res,
2615                                       fold_convert (type, mby));
2616
2617     case PLUS_EXPR:
2618     case MINUS_EXPR:
2619       p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2620       if (!p0)
2621         return NULL_TREE;
2622       p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2623       if (!p1)
2624         return NULL_TREE;
2625
2626       return fold_binary_to_constant (code, type, p0, p1);
2627
2628     case INTEGER_CST:
2629       if (TREE_CODE (bot) != INTEGER_CST)
2630         return NULL_TREE;
2631
2632       bot = fold_convert (type, bot);
2633       top = fold_convert (type, top);
2634
2635       /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2636          the result afterwards.  */
2637       if (tree_int_cst_sign_bit (bot))
2638         {
2639           negate = true;
2640           bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2641         }
2642       else
2643         negate = false;
2644
2645       /* Ditto for TOP.  */
2646       if (tree_int_cst_sign_bit (top))
2647         {
2648           negate = !negate;
2649           top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2650         }
2651
2652       if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2653         return NULL_TREE;
2654
2655       res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2656       if (negate)
2657         res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2658       return res;
2659
2660     default:
2661       return NULL_TREE;
2662     }
2663 }
2664
2665 /* Sets COMB to CST.  */
2666
2667 static void
2668 aff_combination_const (struct affine_tree_combination *comb, tree type,
2669                        unsigned HOST_WIDE_INT cst)
2670 {
2671   unsigned prec = TYPE_PRECISION (type);
2672
2673   comb->type = type;
2674   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2675
2676   comb->n = 0;
2677   comb->rest = NULL_TREE;
2678   comb->offset = cst & comb->mask;
2679 }
2680
2681 /* Sets COMB to single element ELT.  */
2682
2683 static void
2684 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2685 {
2686   unsigned prec = TYPE_PRECISION (type);
2687
2688   comb->type = type;
2689   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2690
2691   comb->n = 1;
2692   comb->elts[0] = elt;
2693   comb->coefs[0] = 1;
2694   comb->rest = NULL_TREE;
2695   comb->offset = 0;
2696 }
2697
2698 /* Scales COMB by SCALE.  */
2699
2700 static void
2701 aff_combination_scale (struct affine_tree_combination *comb,
2702                        unsigned HOST_WIDE_INT scale)
2703 {
2704   unsigned i, j;
2705
2706   if (scale == 1)
2707     return;
2708
2709   if (scale == 0)
2710     {
2711       aff_combination_const (comb, comb->type, 0);
2712       return;
2713     }
2714
2715   comb->offset = (scale * comb->offset) & comb->mask;
2716   for (i = 0, j = 0; i < comb->n; i++)
2717     {
2718       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2719       comb->elts[j] = comb->elts[i];
2720       if (comb->coefs[j] != 0)
2721         j++;
2722     }
2723   comb->n = j;
2724
2725   if (comb->rest)
2726     {
2727       if (comb->n < MAX_AFF_ELTS)
2728         {
2729           comb->coefs[comb->n] = scale;
2730           comb->elts[comb->n] = comb->rest;
2731           comb->rest = NULL_TREE;
2732           comb->n++;
2733         }
2734       else
2735         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2736                                   build_int_cst_type (comb->type, scale));
2737     }
2738 }
2739
2740 /* Adds ELT * SCALE to COMB.  */
2741
2742 static void
2743 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2744                          unsigned HOST_WIDE_INT scale)
2745 {
2746   unsigned i;
2747
2748   if (scale == 0)
2749     return;
2750
2751   for (i = 0; i < comb->n; i++)
2752     if (operand_equal_p (comb->elts[i], elt, 0))
2753       {
2754         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2755         if (comb->coefs[i])
2756           return;
2757
2758         comb->n--;
2759         comb->coefs[i] = comb->coefs[comb->n];
2760         comb->elts[i] = comb->elts[comb->n];
2761         return;
2762       }
2763   if (comb->n < MAX_AFF_ELTS)
2764     {
2765       comb->coefs[comb->n] = scale;
2766       comb->elts[comb->n] = elt;
2767       comb->n++;
2768       return;
2769     }
2770
2771   if (scale == 1)
2772     elt = fold_convert (comb->type, elt);
2773   else
2774     elt = fold_build2 (MULT_EXPR, comb->type,
2775                        fold_convert (comb->type, elt),
2776                        build_int_cst_type (comb->type, scale));
2777
2778   if (comb->rest)
2779     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2780   else
2781     comb->rest = elt;
2782 }
2783
2784 /* Adds COMB2 to COMB1.  */
2785
2786 static void
2787 aff_combination_add (struct affine_tree_combination *comb1,
2788                      struct affine_tree_combination *comb2)
2789 {
2790   unsigned i;
2791
2792   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2793   for (i = 0; i < comb2-> n; i++)
2794     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2795   if (comb2->rest)
2796     aff_combination_add_elt (comb1, comb2->rest, 1);
2797 }
2798
2799 /* Splits EXPR into an affine combination of parts.  */
2800
2801 static void
2802 tree_to_aff_combination (tree expr, tree type,
2803                          struct affine_tree_combination *comb)
2804 {
2805   struct affine_tree_combination tmp;
2806   enum tree_code code;
2807   tree cst, core, toffset;
2808   HOST_WIDE_INT bitpos, bitsize;
2809   enum machine_mode mode;
2810   int unsignedp, volatilep;
2811
2812   STRIP_NOPS (expr);
2813
2814   code = TREE_CODE (expr);
2815   switch (code)
2816     {
2817     case INTEGER_CST:
2818       aff_combination_const (comb, type, int_cst_value (expr));
2819       return;
2820
2821     case PLUS_EXPR:
2822     case MINUS_EXPR:
2823       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2824       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2825       if (code == MINUS_EXPR)
2826         aff_combination_scale (&tmp, -1);
2827       aff_combination_add (comb, &tmp);
2828       return;
2829
2830     case MULT_EXPR:
2831       cst = TREE_OPERAND (expr, 1);
2832       if (TREE_CODE (cst) != INTEGER_CST)
2833         break;
2834       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2835       aff_combination_scale (comb, int_cst_value (cst));
2836       return;
2837
2838     case NEGATE_EXPR:
2839       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2840       aff_combination_scale (comb, -1);
2841       return;
2842
2843     case ADDR_EXPR:
2844       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2845                                   &toffset, &mode, &unsignedp, &volatilep,
2846                                   false);
2847       if (bitpos % BITS_PER_UNIT != 0)
2848         break;
2849       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2850       core = build_fold_addr_expr (core);
2851       if (TREE_CODE (core) == ADDR_EXPR)
2852         aff_combination_add_elt (comb, core, 1);
2853       else
2854         {
2855           tree_to_aff_combination (core, type, &tmp);
2856           aff_combination_add (comb, &tmp);
2857         }
2858       if (toffset)
2859         {
2860           tree_to_aff_combination (toffset, type, &tmp);
2861           aff_combination_add (comb, &tmp);
2862         }
2863       return;
2864
2865     default:
2866       break;
2867     }
2868
2869   aff_combination_elt (comb, type, expr);
2870 }
2871
2872 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2873
2874 static tree
2875 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2876                  unsigned HOST_WIDE_INT mask)
2877 {
2878   enum tree_code code;
2879
2880   scale &= mask;
2881   elt = fold_convert (type, elt);
2882
2883   if (scale == 1)
2884     {
2885       if (!expr)
2886         return elt;
2887
2888       return fold_build2 (PLUS_EXPR, type, expr, elt);
2889     }
2890
2891   if (scale == mask)
2892     {
2893       if (!expr)
2894         return fold_build1 (NEGATE_EXPR, type, elt);
2895
2896       return fold_build2 (MINUS_EXPR, type, expr, elt);
2897     }
2898
2899   if (!expr)
2900     return fold_build2 (MULT_EXPR, type, elt,
2901                         build_int_cst_type (type, scale));
2902
2903   if ((scale | (mask >> 1)) == mask)
2904     {
2905       /* Scale is negative.  */
2906       code = MINUS_EXPR;
2907       scale = (-scale) & mask;
2908     }
2909   else
2910     code = PLUS_EXPR;
2911
2912   elt = fold_build2 (MULT_EXPR, type, elt,
2913                      build_int_cst_type (type, scale));
2914   return fold_build2 (code, type, expr, elt);
2915 }
2916
2917 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2918
2919 static void
2920 unshare_aff_combination (struct affine_tree_combination *comb)
2921 {
2922   unsigned i;
2923
2924   for (i = 0; i < comb->n; i++)
2925     comb->elts[i] = unshare_expr (comb->elts[i]);
2926   if (comb->rest)
2927     comb->rest = unshare_expr (comb->rest);
2928 }
2929
2930 /* Makes tree from the affine combination COMB.  */
2931
2932 static tree
2933 aff_combination_to_tree (struct affine_tree_combination *comb)
2934 {
2935   tree type = comb->type;
2936   tree expr = comb->rest;
2937   unsigned i;
2938   unsigned HOST_WIDE_INT off, sgn;
2939
2940   /* Handle the special case produced by get_computation_aff when
2941      the type does not fit in HOST_WIDE_INT.  */
2942   if (comb->n == 0 && comb->offset == 0)
2943     return fold_convert (type, expr);
2944
2945   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2946
2947   for (i = 0; i < comb->n; i++)
2948     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2949                             comb->mask);
2950
2951   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2952     {
2953       /* Offset is negative.  */
2954       off = (-comb->offset) & comb->mask;
2955       sgn = comb->mask;
2956     }
2957   else
2958     {
2959       off = comb->offset;
2960       sgn = 1;
2961     }
2962   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2963                           comb->mask);
2964 }
2965
2966 /* Determines the expression by that USE is expressed from induction variable
2967    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2968    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2969
2970 static bool
2971 get_computation_aff (struct loop *loop,
2972                      struct iv_use *use, struct iv_cand *cand, tree at,
2973                      struct affine_tree_combination *aff)
2974 {
2975   tree ubase = use->iv->base;
2976   tree ustep = use->iv->step;
2977   tree cbase = cand->iv->base;
2978   tree cstep = cand->iv->step;
2979   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2980   tree uutype;
2981   tree expr, delta;
2982   tree ratio;
2983   unsigned HOST_WIDE_INT ustepi, cstepi;
2984   HOST_WIDE_INT ratioi;
2985   struct affine_tree_combination cbase_aff, expr_aff;
2986   tree cstep_orig = cstep, ustep_orig = ustep;
2987
2988   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2989     {
2990       /* We do not have a precision to express the values of use.  */
2991       return false;
2992     }
2993
2994   expr = var_at_stmt (loop, cand, at);
2995
2996   if (TREE_TYPE (expr) != ctype)
2997     {
2998       /* This may happen with the original ivs.  */
2999       expr = fold_convert (ctype, expr);
3000     }
3001
3002   if (TYPE_UNSIGNED (utype))
3003     uutype = utype;
3004   else
3005     {
3006       uutype = unsigned_type_for (utype);
3007       ubase = fold_convert (uutype, ubase);
3008       ustep = fold_convert (uutype, ustep);
3009     }
3010
3011   if (uutype != ctype)
3012     {
3013       expr = fold_convert (uutype, expr);
3014       cbase = fold_convert (uutype, cbase);
3015       cstep = fold_convert (uutype, cstep);
3016
3017       /* If the conversion is not noop, we must take it into account when
3018          considering the value of the step.  */
3019       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3020         cstep_orig = cstep;
3021     }
3022
3023   if (cst_and_fits_in_hwi (cstep_orig)
3024       && cst_and_fits_in_hwi (ustep_orig))
3025     {
3026       ustepi = int_cst_value (ustep_orig);
3027       cstepi = int_cst_value (cstep_orig);
3028
3029       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3030         {
3031           /* TODO maybe consider case when ustep divides cstep and the ratio is
3032              a power of 2 (so that the division is fast to execute)?  We would
3033              need to be much more careful with overflows etc. then.  */
3034           return false;
3035         }
3036
3037       ratio = build_int_cst_type (uutype, ratioi);
3038     }
3039   else
3040     {
3041       ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3042       if (!ratio)
3043         return false;
3044
3045       /* Ratioi is only used to detect special cases when the multiplicative
3046          factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3047          we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
3048          to integer_onep/integer_all_onesp, since the former ignores
3049          TREE_OVERFLOW.  */
3050       if (cst_and_fits_in_hwi (ratio))
3051         ratioi = int_cst_value (ratio);
3052       else if (integer_onep (ratio))
3053         ratioi = 1;
3054       else if (integer_all_onesp (ratio))
3055         ratioi = -1;
3056       else
3057         ratioi = 0;
3058     }
3059
3060   /* We may need to shift the value if we are after the increment.  */
3061   if (stmt_after_increment (loop, cand, at))
3062     cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3063
3064   /* use = ubase - ratio * cbase + ratio * var.
3065
3066      In general case ubase + ratio * (var - cbase) could be better (one less
3067      multiplication), but often it is possible to eliminate redundant parts
3068      of computations from (ubase - ratio * cbase) term, and if it does not
3069      happen, fold is able to apply the distributive law to obtain this form
3070      anyway.  */
3071
3072   if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3073     {
3074       /* Let's compute in trees and just return the result in AFF.  This case
3075          should not be very common, and fold itself is not that bad either,
3076          so making the aff. functions more complicated to handle this case
3077          is not that urgent.  */
3078       if (ratioi == 1)
3079         {
3080           delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3081           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3082         }
3083       else if (ratioi == -1)
3084         {
3085           delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3086           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3087         }
3088       else
3089         {
3090           delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3091           delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3092           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3093           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3094         }
3095
3096       aff->type = uutype;
3097       aff->n = 0;
3098       aff->offset = 0;
3099       aff->mask = 0;
3100       aff->rest = expr;
3101       return true;
3102     }
3103
3104   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3105      possible to compute ratioi.  */
3106   gcc_assert (ratioi);
3107
3108   tree_to_aff_combination (ubase, uutype, aff);
3109   tree_to_aff_combination (cbase, uutype, &cbase_aff);
3110   tree_to_aff_combination (expr, uutype, &expr_aff);
3111   aff_combination_scale (&cbase_aff, -ratioi);
3112   aff_combination_scale (&expr_aff, ratioi);
3113   aff_combination_add (aff, &cbase_aff);
3114   aff_combination_add (aff, &expr_aff);
3115
3116   return true;
3117 }
3118
3119 /* Determines the expression by that USE is expressed from induction variable
3120    CAND at statement AT in LOOP.  The computation is unshared.  */
3121
3122 static tree
3123 get_computation_at (struct loop *loop,
3124                     struct iv_use *use, struct iv_cand *cand, tree at)
3125 {
3126   struct affine_tree_combination aff;
3127   tree type = TREE_TYPE (use->iv->base);
3128
3129   if (!get_computation_aff (loop, use, cand, at, &aff))
3130     return NULL_TREE;
3131   unshare_aff_combination (&aff);
3132   return fold_convert (type, aff_combination_to_tree (&aff));
3133 }
3134
3135 /* Determines the expression by that USE is expressed from induction variable
3136    CAND in LOOP.  The computation is unshared.  */
3137
3138 static tree
3139 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3140 {
3141   return get_computation_at (loop, use, cand, use->stmt);
3142 }
3143
3144 /* Returns cost of addition in MODE.  */
3145
3146 static unsigned
3147 add_cost (enum machine_mode mode)
3148 {
3149   static unsigned costs[NUM_MACHINE_MODES];
3150   rtx seq;
3151   unsigned cost;
3152
3153   if (costs[mode])
3154     return costs[mode];
3155
3156   start_sequence ();
3157   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3158                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3159                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3160                  NULL_RTX);
3161   seq = get_insns ();
3162   end_sequence ();
3163
3164   cost = seq_cost (seq);
3165   if (!cost)
3166     cost = 1;
3167
3168   costs[mode] = cost;
3169
3170   if (dump_file && (dump_flags & TDF_DETAILS))
3171     fprintf (dump_file, "Addition in %s costs %d\n",
3172              GET_MODE_NAME (mode), cost);
3173   return cost;
3174 }
3175
3176 /* Entry in a hashtable of already known costs for multiplication.  */
3177 struct mbc_entry
3178 {
3179   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3180   enum machine_mode mode;       /* In mode.  */
3181   unsigned cost;                /* The cost.  */
3182 };
3183
3184 /* Counts hash value for the ENTRY.  */
3185
3186 static hashval_t
3187 mbc_entry_hash (const void *entry)
3188 {
3189   const struct mbc_entry *e = entry;
3190
3191   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3192 }
3193
3194 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3195
3196 static int
3197 mbc_entry_eq (const void *entry1, const void *entry2)
3198 {
3199   const struct mbc_entry *e1 = entry1;
3200   const struct mbc_entry *e2 = entry2;
3201
3202   return (e1->mode == e2->mode
3203           && e1->cst == e2->cst);
3204 }
3205
3206 /* Returns cost of multiplication by constant CST in MODE.  */
3207
3208 unsigned
3209 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3210 {
3211   static htab_t costs;
3212   struct mbc_entry **cached, act;
3213   rtx seq;
3214   unsigned cost;
3215
3216   if (!costs)
3217     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3218
3219   act.mode = mode;
3220   act.cst = cst;
3221   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3222   if (*cached)
3223     return (*cached)->cost;
3224
3225   *cached = xmalloc (sizeof (struct mbc_entry));
3226   (*cached)->mode = mode;
3227   (*cached)->cst = cst;
3228
3229   start_sequence ();
3230   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3231                gen_int_mode (cst, mode), NULL_RTX, 0);
3232   seq = get_insns ();
3233   end_sequence ();
3234
3235   cost = seq_cost (seq);
3236
3237   if (dump_file && (dump_flags & TDF_DETAILS))
3238     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3239              (int) cst, GET_MODE_NAME (mode), cost);
3240
3241   (*cached)->cost = cost;
3242
3243   return cost;
3244 }
3245
3246 /* Returns true if multiplying by RATIO is allowed in address.  */
3247
3248 bool
3249 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3250 {
3251 #define MAX_RATIO 128
3252   static sbitmap valid_mult;
3253
3254   if (!valid_mult)
3255     {
3256       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3257       rtx addr;
3258       HOST_WIDE_INT i;
3259
3260       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3261       sbitmap_zero (valid_mult);
3262       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3263       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3264         {
3265           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3266           if (memory_address_p (Pmode, addr))
3267             SET_BIT (valid_mult, i + MAX_RATIO);
3268         }
3269
3270       if (dump_file && (dump_flags & TDF_DETAILS))
3271         {
3272           fprintf (dump_file, "  allowed multipliers:");
3273           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3274             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3275               fprintf (dump_file, " %d", (int) i);
3276           fprintf (dump_file, "\n");
3277           fprintf (dump_file, "\n");
3278         }
3279     }
3280
3281   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3282     return false;
3283
3284   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3285 }
3286
3287 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3288    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3289    variable is omitted.  The created memory accesses MODE.
3290
3291    TODO -- there must be some better way.  This all is quite crude.  */
3292
3293 static unsigned
3294 get_address_cost (bool symbol_present, bool var_present,
3295                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3296 {
3297   static bool initialized = false;
3298   static HOST_WIDE_INT rat, off;
3299   static HOST_WIDE_INT min_offset, max_offset;
3300   static unsigned costs[2][2][2][2];
3301   unsigned cost, acost;
3302   rtx seq, addr, base;
3303   bool offset_p, ratio_p;
3304   rtx reg1;
3305   HOST_WIDE_INT s_offset;
3306   unsigned HOST_WIDE_INT mask;
3307   unsigned bits;
3308
3309   if (!initialized)
3310     {
3311       HOST_WIDE_INT i;
3312       initialized = true;
3313
3314       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3315
3316       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3317       for (i = 1; i <= 1 << 20; i <<= 1)
3318         {
3319           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3320           if (!memory_address_p (Pmode, addr))
3321             break;
3322         }
3323       max_offset = i >> 1;
3324       off = max_offset;
3325
3326       for (i = 1; i <= 1 << 20; i <<= 1)
3327         {
3328           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3329           if (!memory_address_p (Pmode, addr))
3330             break;
3331         }
3332       min_offset = -(i >> 1);
3333
3334       if (dump_file && (dump_flags & TDF_DETAILS))
3335         {
3336           fprintf (dump_file, "get_address_cost:\n");
3337           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3338           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3339         }
3340
3341       rat = 1;
3342       for (i = 2; i <= MAX_RATIO; i++)
3343         if (multiplier_allowed_in_address_p (i))
3344           {
3345             rat = i;
3346             break;
3347           }
3348     }
3349
3350   bits = GET_MODE_BITSIZE (Pmode);
3351   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3352   offset &= mask;
3353   if ((offset >> (bits - 1) & 1))
3354     offset |= ~mask;
3355   s_offset = offset;
3356
3357   cost = 0;
3358   offset_p = (s_offset != 0
3359               && min_offset <= s_offset && s_offset <= max_offset);
3360   ratio_p = (ratio != 1
3361              && multiplier_allowed_in_address_p (ratio));
3362
3363   if (ratio != 1 && !ratio_p)
3364     cost += multiply_by_cost (ratio, Pmode);
3365
3366   if (s_offset && !offset_p && !symbol_present)
3367     {
3368       cost += add_cost (Pmode);
3369       var_present = true;
3370     }
3371
3372   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3373   if (!acost)
3374     {
3375       acost = 0;
3376
3377       addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3378       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3379       if (ratio_p)
3380         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3381
3382       if (var_present)
3383         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3384
3385       if (symbol_present)
3386         {
3387           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3388           if (offset_p)
3389             base = gen_rtx_fmt_e (CONST, Pmode,
3390                                   gen_rtx_fmt_ee (PLUS, Pmode,
3391                                                   base,
3392                                                   gen_int_mode (off, Pmode)));
3393         }
3394       else if (offset_p)
3395         base = gen_int_mode (off, Pmode);
3396       else
3397         base = NULL_RTX;
3398
3399       if (base)
3400         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3401
3402       start_sequence ();
3403       addr = memory_address (Pmode, addr);
3404       seq = get_insns ();
3405       end_sequence ();
3406
3407       acost = seq_cost (seq);
3408       acost += address_cost (addr, Pmode);
3409
3410       if (!acost)
3411         acost = 1;
3412       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3413     }
3414
3415   return cost + acost;
3416 }
3417 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3418    invariants the computation depends on.  */
3419
3420 static unsigned
3421 force_var_cost (struct ivopts_data *data,
3422                 tree expr, bitmap *depends_on)
3423 {
3424   static bool costs_initialized = false;
3425   static unsigned integer_cost;
3426   static unsigned symbol_cost;
3427   static unsigned address_cost;
3428   tree op0, op1;
3429   unsigned cost0, cost1, cost;
3430   enum machine_mode mode;
3431
3432   if (!costs_initialized)
3433     {
3434       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3435       rtx x = gen_rtx_MEM (DECL_MODE (var),
3436                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3437       tree addr;
3438       tree type = build_pointer_type (integer_type_node);
3439
3440       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3441                                                            2000));
3442
3443       SET_DECL_RTL (var, x);
3444       TREE_STATIC (var) = 1;
3445       addr = build1 (ADDR_EXPR, type, var);
3446       symbol_cost = computation_cost (addr) + 1;
3447
3448       address_cost
3449         = computation_cost (build2 (PLUS_EXPR, type,
3450                                     addr,
3451                                     build_int_cst_type (type, 2000))) + 1;
3452       if (dump_file && (dump_flags & TDF_DETAILS))
3453         {
3454           fprintf (dump_file, "force_var_cost:\n");
3455           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3456           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3457           fprintf (dump_file, "  address %d\n", (int) address_cost);
3458           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3459           fprintf (dump_file, "\n");
3460         }
3461
3462       costs_initialized = true;
3463     }
3464
3465   STRIP_NOPS (expr);
3466
3467   if (depends_on)
3468     {
3469       fd_ivopts_data = data;
3470       walk_tree (&expr, find_depends, depends_on, NULL);
3471     }
3472
3473   if (SSA_VAR_P (expr))
3474     return 0;
3475
3476   if (TREE_INVARIANT (expr))
3477     {
3478       if (TREE_CODE (expr) == INTEGER_CST)
3479         return integer_cost;
3480
3481       if (TREE_CODE (expr) == ADDR_EXPR)
3482         {
3483           tree obj = TREE_OPERAND (expr, 0);
3484
3485           if (TREE_CODE (obj) == VAR_DECL
3486               || TREE_CODE (obj) == PARM_DECL
3487               || TREE_CODE (obj) == RESULT_DECL)
3488             return symbol_cost;
3489         }
3490
3491       return address_cost;
3492     }
3493
3494   switch (TREE_CODE (expr))
3495     {
3496     case PLUS_EXPR:
3497     case MINUS_EXPR:
3498     case MULT_EXPR:
3499       op0 = TREE_OPERAND (expr, 0);
3500       op1 = TREE_OPERAND (expr, 1);
3501       STRIP_NOPS (op0);
3502       STRIP_NOPS (op1);
3503
3504       if (is_gimple_val (op0))
3505         cost0 = 0;
3506       else
3507         cost0 = force_var_cost (data, op0, NULL);
3508
3509       if (is_gimple_val (op1))
3510         cost1 = 0;
3511       else
3512         cost1 = force_var_cost (data, op1, NULL);
3513
3514       break;
3515
3516     default:
3517       /* Just an arbitrary value, FIXME.  */
3518       return target_spill_cost;
3519     }
3520
3521   mode = TYPE_MODE (TREE_TYPE (expr));
3522   switch (TREE_CODE (expr))
3523     {
3524     case PLUS_EXPR:
3525     case MINUS_EXPR:
3526       cost = add_cost (mode);
3527       break;
3528
3529     case MULT_EXPR:
3530       if (cst_and_fits_in_hwi (op0))
3531         cost = multiply_by_cost (int_cst_value (op0), mode);
3532       else if (cst_and_fits_in_hwi (op1))
3533         cost = multiply_by_cost (int_cst_value (op1), mode);
3534       else
3535         return target_spill_cost;
3536       break;
3537
3538     default:
3539       gcc_unreachable ();
3540     }
3541
3542   cost += cost0;
3543   cost += cost1;
3544
3545   /* Bound the cost by target_spill_cost.  The parts of complicated
3546      computations often are either loop invariant or at least can
3547      be shared between several iv uses, so letting this grow without
3548      limits would not give reasonable results.  */
3549   return cost < target_spill_cost ? cost : target_spill_cost;
3550 }
3551
3552 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3553    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3554    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3555    invariants the computation depends on.  */
3556
3557 static unsigned
3558 split_address_cost (struct ivopts_data *data,
3559                     tree addr, bool *symbol_present, bool *var_present,
3560                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3561 {
3562   tree core;
3563   HOST_WIDE_INT bitsize;
3564   HOST_WIDE_INT bitpos;
3565   tree toffset;
3566   enum machine_mode mode;
3567   int unsignedp, volatilep;
3568
3569   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3570                               &unsignedp, &volatilep, false);
3571
3572   if (toffset != 0
3573       || bitpos % BITS_PER_UNIT != 0
3574       || TREE_CODE (core) != VAR_DECL)
3575     {
3576       *symbol_present = false;
3577       *var_present = true;
3578       fd_ivopts_data = data;
3579       walk_tree (&addr, find_depends, depends_on, NULL);
3580       return target_spill_cost;
3581     }
3582
3583   *offset += bitpos / BITS_PER_UNIT;
3584   if (TREE_STATIC (core)
3585       || DECL_EXTERNAL (core))
3586     {
3587       *symbol_present = true;
3588       *var_present = false;
3589       return 0;
3590     }
3591
3592   *symbol_present = false;
3593   *var_present = true;
3594   return 0;
3595 }
3596
3597 /* Estimates cost of expressing difference of addresses E1 - E2 as
3598    var + symbol + offset.  The value of offset is added to OFFSET,
3599    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3600    part is missing.  DEPENDS_ON is a set of the invariants the computation
3601    depends on.  */
3602
3603 static unsigned
3604 ptr_difference_cost (struct ivopts_data *data,
3605                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3606                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3607 {
3608   HOST_WIDE_INT diff = 0;
3609   unsigned cost;
3610
3611   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3612
3613   if (ptr_difference_const (e1, e2, &diff))
3614     {
3615       *offset += diff;
3616       *symbol_present = false;
3617       *var_present = false;
3618       return 0;
3619     }
3620
3621   if (e2 == integer_zero_node)
3622     return split_address_cost (data, TREE_OPERAND (e1, 0),
3623                                symbol_present, var_present, offset, depends_on);
3624
3625   *symbol_present = false;
3626   *var_present = true;
3627
3628   cost = force_var_cost (data, e1, depends_on);
3629   cost += force_var_cost (data, e2, depends_on);
3630   cost += add_cost (Pmode);
3631
3632   return cost;
3633 }
3634
3635 /* Estimates cost of expressing difference E1 - E2 as
3636    var + symbol + offset.  The value of offset is added to OFFSET,
3637    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3638    part is missing.  DEPENDS_ON is a set of the invariants the computation
3639    depends on.  */
3640
3641 static unsigned
3642 difference_cost (struct ivopts_data *data,
3643                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3644                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3645 {
3646   unsigned cost;
3647   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3648   unsigned HOST_WIDE_INT off1, off2;
3649
3650   e1 = strip_offset (e1, &off1);
3651   e2 = strip_offset (e2, &off2);
3652   *offset += off1 - off2;
3653
3654   STRIP_NOPS (e1);
3655   STRIP_NOPS (e2);
3656
3657   if (TREE_CODE (e1) == ADDR_EXPR)
3658     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3659                                 depends_on);
3660   *symbol_present = false;
3661
3662   if (operand_equal_p (e1, e2, 0))
3663     {
3664       *var_present = false;
3665       return 0;
3666     }
3667   *var_present = true;
3668   if (zero_p (e2))
3669     return force_var_cost (data, e1, depends_on);
3670
3671   if (zero_p (e1))
3672     {
3673       cost = force_var_cost (data, e2, depends_on);
3674       cost += multiply_by_cost (-1, mode);
3675
3676       return cost;
3677     }
3678
3679   cost = force_var_cost (data, e1, depends_on);
3680   cost += force_var_cost (data, e2, depends_on);
3681   cost += add_cost (mode);
3682
3683   return cost;
3684 }
3685
3686 /* Determines the cost of the computation by that USE is expressed
3687    from induction variable CAND.  If ADDRESS_P is true, we just need
3688    to create an address from it, otherwise we want to get it into
3689    register.  A set of invariants we depend on is stored in
3690    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3691
3692 static unsigned
3693 get_computation_cost_at (struct ivopts_data *data,
3694                          struct iv_use *use, struct iv_cand *cand,
3695                          bool address_p, bitmap *depends_on, tree at)
3696 {
3697   tree ubase = use->iv->base, ustep = use->iv->step;
3698   tree cbase, cstep;
3699   tree utype = TREE_TYPE (ubase), ctype;
3700   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3701   HOST_WIDE_INT ratio, aratio;
3702   bool var_present, symbol_present;
3703   unsigned cost = 0, n_sums;
3704
3705   *depends_on = NULL;
3706
3707   /* Only consider real candidates.  */
3708   if (!cand->iv)
3709     return INFTY;
3710
3711   cbase = cand->iv->base;
3712   cstep = cand->iv->step;
3713   ctype = TREE_TYPE (cbase);
3714
3715   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3716     {
3717       /* We do not have a precision to express the values of use.  */
3718       return INFTY;
3719     }
3720
3721   if (address_p)
3722     {
3723       /* Do not try to express address of an object with computation based
3724          on address of a different object.  This may cause problems in rtl
3725          level alias analysis (that does not expect this to be happening,
3726          as this is illegal in C), and would be unlikely to be useful
3727          anyway.  */
3728       if (use->iv->base_object
3729           && cand->iv->base_object
3730           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3731         return INFTY;
3732     }
3733
3734   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3735     {
3736       /* TODO -- add direct handling of this case.  */
3737       goto fallback;
3738     }
3739
3740   /* CSTEPI is removed from the offset in case statement is after the
3741      increment.  If the step is not constant, we use zero instead.
3742      This is a bit imprecise (there is the extra addition), but
3743      redundancy elimination is likely to transform the code so that
3744      it uses value of the variable before increment anyway,
3745      so it is not that much unrealistic.  */
3746   if (cst_and_fits_in_hwi (cstep))
3747     cstepi = int_cst_value (cstep);
3748   else
3749     cstepi = 0;
3750
3751   if (cst_and_fits_in_hwi (ustep)
3752       && cst_and_fits_in_hwi (cstep))
3753     {
3754       ustepi = int_cst_value (ustep);
3755
3756       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3757         return INFTY;
3758     }
3759   else
3760     {
3761       tree rat;
3762
3763       rat = constant_multiple_of (utype, ustep, cstep);
3764
3765       if (!rat)
3766         return INFTY;
3767
3768       if (cst_and_fits_in_hwi (rat))
3769         ratio = int_cst_value (rat);
3770       else if (integer_onep (rat))
3771         ratio = 1;
3772       else if (integer_all_onesp (rat))
3773         ratio = -1;
3774       else
3775         return INFTY;
3776     }
3777
3778   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3779      or ratio == 1, it is better to handle this like
3780
3781      ubase - ratio * cbase + ratio * var
3782
3783      (also holds in the case ratio == -1, TODO.  */
3784
3785   if (cst_and_fits_in_hwi (cbase))
3786     {
3787       offset = - ratio * int_cst_value (cbase);
3788       cost += difference_cost (data,
3789                                ubase, integer_zero_node,
3790                                &symbol_present, &var_present, &offset,
3791                                depends_on);
3792     }
3793   else if (ratio == 1)
3794     {
3795       cost += difference_cost (data,
3796                                ubase, cbase,
3797                                &symbol_present, &var_present, &offset,
3798                                depends_on);
3799     }
3800   else
3801     {
3802       cost += force_var_cost (data, cbase, depends_on);
3803       cost += add_cost (TYPE_MODE (ctype));
3804       cost += difference_cost (data,
3805                                ubase, integer_zero_node,
3806                                &symbol_present, &var_present, &offset,
3807                                depends_on);
3808     }
3809
3810   /* If we are after the increment, the value of the candidate is higher by
3811      one iteration.  */
3812   if (stmt_after_increment (data->current_loop, cand, at))
3813     offset -= ratio * cstepi;
3814
3815   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3816      (symbol/var/const parts may be omitted).  If we are looking for an address,
3817      find the cost of addressing this.  */
3818   if (address_p)
3819     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3820
3821   /* Otherwise estimate the costs for computing the expression.  */
3822   aratio = ratio > 0 ? ratio : -ratio;
3823   if (!symbol_present && !var_present && !offset)
3824     {
3825       if (ratio != 1)
3826         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3827
3828       return cost;
3829     }
3830
3831   if (aratio != 1)
3832     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3833
3834   n_sums = 1;
3835   if (var_present
3836       /* Symbol + offset should be compile-time computable.  */
3837       && (symbol_present || offset))
3838     n_sums++;
3839
3840   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3841
3842 fallback:
3843   {
3844     /* Just get the expression, expand it and measure the cost.  */
3845     tree comp = get_computation_at (data->current_loop, use, cand, at);
3846
3847     if (!comp)
3848       return INFTY;
3849
3850     if (address_p)
3851       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3852
3853     return computation_cost (comp);
3854   }
3855 }
3856
3857 /* Determines the cost of the computation by that USE is expressed
3858    from induction variable CAND.  If ADDRESS_P is true, we just need
3859    to create an address from it, otherwise we want to get it into
3860    register.  A set of invariants we depend on is stored in
3861    DEPENDS_ON.  */
3862
3863 static unsigned
3864 get_computation_cost (struct ivopts_data *data,
3865                       struct iv_use *use, struct iv_cand *cand,
3866                       bool address_p, bitmap *depends_on)
3867 {
3868   return get_computation_cost_at (data,
3869                                   use, cand, address_p, depends_on, use->stmt);
3870 }
3871
3872 /* Determines cost of basing replacement of USE on CAND in a generic
3873    expression.  */
3874
3875 static bool
3876 determine_use_iv_cost_generic (struct ivopts_data *data,
3877                                struct iv_use *use, struct iv_cand *cand)
3878 {
3879   bitmap depends_on;
3880   unsigned cost;
3881
3882   /* The simple case first -- if we need to express value of the preserved
3883      original biv, the cost is 0.  This also prevents us from counting the
3884      cost of increment twice -- once at this use and once in the cost of
3885      the candidate.  */
3886   if (cand->pos == IP_ORIGINAL
3887       && cand->incremented_at == use->stmt)
3888     {
3889       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3890       return true;
3891     }
3892
3893   cost = get_computation_cost (data, use, cand, false, &depends_on);
3894   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3895
3896   return cost != INFTY;
3897 }
3898
3899 /* Determines cost of basing replacement of USE on CAND in an address.  */
3900
3901 static bool
3902 determine_use_iv_cost_address (struct ivopts_data *data,
3903                                struct iv_use *use, struct iv_cand *cand)
3904 {
3905   bitmap depends_on;
3906   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3907
3908   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3909
3910   return cost != INFTY;
3911 }
3912
3913 /* Computes value of induction variable IV in iteration NITER.  */
3914
3915 static tree
3916 iv_value (struct iv *iv, tree niter)
3917 {
3918   tree val;
3919   tree type = TREE_TYPE (iv->base);
3920
3921   niter = fold_convert (type, niter);
3922   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3923
3924   return fold_build2 (PLUS_EXPR, type, iv->base, val);
3925 }
3926
3927 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3928
3929 static tree
3930 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3931 {
3932   tree val = iv_value (cand->iv, niter);
3933   tree type = TREE_TYPE (cand->iv->base);
3934
3935   if (stmt_after_increment (loop, cand, at))
3936     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3937
3938   return val;
3939 }
3940
3941 /* Returns period of induction variable iv.  */
3942
3943 static tree
3944 iv_period (struct iv *iv)
3945 {
3946   tree step = iv->step, period, type;
3947   tree pow2div;
3948
3949   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3950
3951   /* Period of the iv is gcd (step, type range).  Since type range is power
3952      of two, it suffices to determine the maximum power of two that divides
3953      step.  */
3954   pow2div = num_ending_zeros (step);
3955   type = unsigned_type_for (TREE_TYPE (step));
3956
3957   period = build_low_bits_mask (type,
3958                                 (TYPE_PRECISION (type)
3959                                  - tree_low_cst (pow2div, 1)));
3960
3961   return period;
3962 }
3963
3964 /* Returns the comparison operator used when eliminating the iv USE.  */
3965
3966 static enum tree_code
3967 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3968 {
3969   struct loop *loop = data->current_loop;
3970   basic_block ex_bb;
3971   edge exit;
3972
3973   ex_bb = bb_for_stmt (use->stmt);
3974   exit = EDGE_SUCC (ex_bb, 0);
3975   if (flow_bb_inside_loop_p (loop, exit->dest))
3976     exit = EDGE_SUCC (ex_bb, 1);
3977
3978   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3979 }
3980
3981 /* Check whether it is possible to express the condition in USE by comparison
3982    of candidate CAND.  If so, store the value compared with to BOUND.  */
3983
3984 static bool
3985 may_eliminate_iv (struct ivopts_data *data,
3986                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3987 {
3988   basic_block ex_bb;
3989   edge exit;
3990   struct tree_niter_desc *niter;
3991   tree nit, nit_type;
3992   tree wider_type, period, per_type;
3993   struct loop *loop = data->current_loop;
3994
3995   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3996     return false;
3997
3998   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3999      for other conditions inside loop body.  */
4000   ex_bb = bb_for_stmt (use->stmt);
4001   if (use->stmt != last_stmt (ex_bb)
4002       || TREE_CODE (use->stmt) != COND_EXPR)
4003     return false;
4004   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4005     return false;
4006
4007   exit = EDGE_SUCC (ex_bb, 0);
4008   if (flow_bb_inside_loop_p (loop, exit->dest))
4009     exit = EDGE_SUCC (ex_bb, 1);
4010   if (flow_bb_inside_loop_p (loop, exit->dest))
4011     return false;
4012
4013   niter = niter_for_exit (data, exit);
4014   if (!niter
4015       || !zero_p (niter->may_be_zero))
4016     return false;
4017
4018   nit = niter->niter;
4019   nit_type = TREE_TYPE (nit);
4020
4021   /* Determine whether we may use the variable to test whether niter iterations
4022      elapsed.  This is the case iff the period of the induction variable is
4023      greater than the number of iterations.  */
4024   period = iv_period (cand->iv);
4025   if (!period)
4026     return false;
4027   per_type = TREE_TYPE (period);
4028
4029   wider_type = TREE_TYPE (period);
4030   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4031     wider_type = per_type;
4032   else
4033     wider_type = nit_type;
4034
4035   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4036                                       fold_convert (wider_type, period),
4037                                       fold_convert (wider_type, nit))))
4038     return false;
4039
4040   *bound = cand_value_at (loop, cand, use->stmt, nit);
4041   return true;
4042 }
4043
4044 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4045
4046 static bool
4047 determine_use_iv_cost_condition (struct ivopts_data *data,
4048                                  struct iv_use *use, struct iv_cand *cand)
4049 {
4050   tree bound = NULL_TREE, op, cond;
4051   bitmap depends_on = NULL;
4052   unsigned cost;
4053
4054   /* Only consider real candidates.  */
4055   if (!cand->iv)
4056     {
4057       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4058       return false;
4059     }
4060
4061   if (may_eliminate_iv (data, use, cand, &bound))
4062     {
4063       cost = force_var_cost (data, bound, &depends_on);
4064
4065       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4066       return cost != INFTY;
4067     }
4068
4069   /* The induction variable elimination failed; just express the original
4070      giv.  If it is compared with an invariant, note that we cannot get
4071      rid of it.  */
4072   cost = get_computation_cost (data, use, cand, false, &depends_on);
4073
4074   cond = *use->op_p;
4075   if (TREE_CODE (cond) != SSA_NAME)
4076     {
4077       op = TREE_OPERAND (cond, 0);
4078       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4079         op = TREE_OPERAND (cond, 1);
4080       if (TREE_CODE (op) == SSA_NAME)
4081         {
4082           op = get_iv (data, op)->base;
4083           fd_ivopts_data = data;
4084           walk_tree (&op, find_depends, &depends_on, NULL);
4085         }
4086     }
4087
4088   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4089   return cost != INFTY;
4090 }
4091
4092 /* Checks whether it is possible to replace the final value of USE by
4093    a direct computation.  If so, the formula is stored to *VALUE.  */
4094
4095 static bool
4096 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4097                          tree *value)
4098 {
4099   struct loop *loop = data->current_loop;
4100   edge exit;
4101   struct tree_niter_desc *niter;
4102
4103   exit = single_dom_exit (loop);
4104   if (!exit)
4105     return false;
4106
4107   gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4108                               bb_for_stmt (use->stmt)));
4109
4110   niter = niter_for_single_dom_exit (data);
4111   if (!niter
4112       || !zero_p (niter->may_be_zero))
4113     return false;
4114
4115   *value = iv_value (use->iv, niter->niter);
4116
4117   return true;
4118 }
4119
4120 /* Determines cost of replacing final value of USE using CAND.  */
4121
4122 static bool
4123 determine_use_iv_cost_outer (struct ivopts_data *data,
4124                              struct iv_use *use, struct iv_cand *cand)
4125 {
4126   bitmap depends_on;
4127   unsigned cost;
4128   edge exit;
4129   tree value = NULL_TREE;
4130   struct loop *loop = data->current_loop;
4131
4132   /* The simple case first -- if we need to express value of the preserved
4133      original biv, the cost is 0.  This also prevents us from counting the
4134      cost of increment twice -- once at this use and once in the cost of
4135      the candidate.  */
4136   if (cand->pos == IP_ORIGINAL
4137       && cand->incremented_at == use->stmt)
4138     {
4139       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4140       return true;
4141     }
4142
4143   if (!cand->iv)
4144     {
4145       if (!may_replace_final_value (data, use, &value))
4146         {
4147           set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4148           return false;
4149         }
4150
4151       depends_on = NULL;
4152       cost = force_var_cost (data, value, &depends_on);
4153
4154       cost /= AVG_LOOP_NITER (loop);
4155
4156       set_use_iv_cost (data, use, cand, cost, depends_on, value);
4157       return cost != INFTY;
4158     }
4159
4160   exit = single_dom_exit (loop);
4161   if (exit)
4162     {
4163       /* If there is just a single exit, we may use value of the candidate
4164          after we take it to determine the value of use.  */
4165       cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4166                                       last_stmt (exit->src));
4167       if (cost != INFTY)
4168         cost /= AVG_LOOP_NITER (loop);
4169     }
4170   else
4171     {
4172       /* Otherwise we just need to compute the iv.  */
4173       cost = get_computation_cost (data, use, cand, false, &depends_on);
4174     }
4175
4176   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4177
4178   return cost != INFTY;
4179 }
4180
4181 /* Determines cost of basing replacement of USE on CAND.  Returns false
4182    if USE cannot be based on CAND.  */
4183
4184 static bool
4185 determine_use_iv_cost (struct ivopts_data *data,
4186                        struct iv_use *use, struct iv_cand *cand)
4187 {
4188   switch (use->type)
4189     {
4190     case USE_NONLINEAR_EXPR:
4191       return determine_use_iv_cost_generic (data, use, cand);
4192
4193     case USE_OUTER:
4194       return determine_use_iv_cost_outer (data, use, cand);
4195
4196     case USE_ADDRESS:
4197       return determine_use_iv_cost_address (data, use, cand);
4198
4199     case USE_COMPARE:
4200       return determine_use_iv_cost_condition (data, use, cand);
4201
4202     default:
4203       gcc_unreachable ();
4204     }
4205 }
4206
4207 /* Determines costs of basing the use of the iv on an iv candidate.  */
4208
4209 static void
4210 determine_use_iv_costs (struct ivopts_data *data)
4211 {
4212   unsigned i, j;
4213   struct iv_use *use;
4214   struct iv_cand *cand;
4215   bitmap to_clear = BITMAP_ALLOC (NULL);
4216
4217   alloc_use_cost_map (data);
4218
4219   for (i = 0; i < n_iv_uses (data); i++)
4220     {
4221       use = iv_use (data, i);
4222
4223       if (data->consider_all_candidates)
4224         {
4225           for (j = 0; j < n_iv_cands (data); j++)
4226             {
4227               cand = iv_cand (data, j);
4228               determine_use_iv_cost (data, use, cand);
4229             }
4230         }
4231       else
4232         {
4233           bitmap_iterator bi;
4234
4235           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4236             {
4237               cand = iv_cand (data, j);
4238               if (!determine_use_iv_cost (data, use, cand))
4239                 bitmap_set_bit (to_clear, j);
4240             }
4241
4242           /* Remove the candidates for that the cost is infinite from
4243              the list of related candidates.  */
4244           bitmap_and_compl_into (use->related_cands, to_clear);
4245           bitmap_clear (to_clear);
4246         }
4247     }
4248
4249   BITMAP_FREE (to_clear);
4250
4251   if (dump_file && (dump_flags & TDF_DETAILS))
4252     {
4253       fprintf (dump_file, "Use-candidate costs:\n");
4254
4255       for (i = 0; i < n_iv_uses (data); i++)
4256         {
4257           use = iv_use (data, i);
4258
4259           fprintf (dump_file, "Use %d:\n", i);
4260           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4261           for (j = 0; j < use->n_map_members; j++)
4262             {
4263               if (!use->cost_map[j].cand
4264                   || use->cost_map[j].cost == INFTY)
4265                 continue;
4266
4267               fprintf (dump_file, "  %d\t%d\t",
4268                        use->cost_map[j].cand->id,
4269                        use->cost_map[j].cost);
4270               if (use->cost_map[j].depends_on)
4271                 bitmap_print (dump_file,
4272                               use->cost_map[j].depends_on, "","");
4273               fprintf (dump_file, "\n");
4274             }
4275
4276           fprintf (dump_file, "\n");
4277         }
4278       fprintf (dump_file, "\n");
4279     }
4280 }
4281
4282 /* Determines cost of the candidate CAND.  */
4283
4284 static void
4285 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4286 {
4287   unsigned cost_base, cost_step;
4288   tree base;
4289
4290   if (!cand->iv)
4291     {
4292       cand->cost = 0;
4293       return;
4294     }
4295
4296   /* There are two costs associated with the candidate -- its increment
4297      and its initialization.  The second is almost negligible for any loop
4298      that rolls enough, so we take it just very little into account.  */
4299
4300   base = cand->iv->base;
4301   cost_base = force_var_cost (data, base, NULL);
4302   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4303
4304   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4305
4306   /* Prefer the original iv unless we may gain something by replacing it;
4307      this is not really relevant for artificial ivs created by other
4308      passes.  */
4309   if (cand->pos == IP_ORIGINAL
4310       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4311     cand->cost--;
4312
4313   /* Prefer not to insert statements into latch unless there are some
4314      already (so that we do not create unnecessary jumps).  */
4315   if (cand->pos == IP_END
4316       && empty_block_p (ip_end_pos (data->current_loop)))
4317     cand->cost++;
4318 }
4319
4320 /* Determines costs of computation of the candidates.  */
4321
4322 static void
4323 determine_iv_costs (struct ivopts_data *data)
4324 {
4325   unsigned i;
4326
4327   if (dump_file && (dump_flags & TDF_DETAILS))
4328     {
4329       fprintf (dump_file, "Candidate costs:\n");
4330       fprintf (dump_file, "  cand\tcost\n");
4331     }
4332
4333   for (i = 0; i < n_iv_cands (data); i++)
4334     {
4335       struct iv_cand *cand = iv_cand (data, i);
4336
4337       determine_iv_cost (data, cand);
4338
4339       if (dump_file && (dump_flags & TDF_DETAILS))
4340         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4341     }
4342
4343 if (dump_file && (dump_flags & TDF_DETAILS))
4344       fprintf (dump_file, "\n");
4345 }
4346
4347 /* Calculates cost for having SIZE induction variables.  */
4348
4349 static unsigned
4350 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4351 {
4352   return global_cost_for_size (size,
4353                                loop_data (data->current_loop)->regs_used,
4354                                n_iv_uses (data));
4355 }
4356
4357 /* For each size of the induction variable set determine the penalty.  */
4358
4359 static void
4360 determine_set_costs (struct ivopts_data *data)
4361 {
4362   unsigned j, n;
4363   tree phi, op;
4364   struct loop *loop = data->current_loop;
4365   bitmap_iterator bi;
4366
4367   /* We use the following model (definitely improvable, especially the
4368      cost function -- TODO):
4369
4370      We estimate the number of registers available (using MD data), name it A.
4371
4372      We estimate the number of registers used by the loop, name it U.  This
4373      number is obtained as the number of loop phi nodes (not counting virtual
4374      registers and bivs) + the number of variables from outside of the loop.
4375
4376      We set a reserve R (free regs that are used for temporary computations,
4377      etc.).  For now the reserve is a constant 3.
4378
4379      Let I be the number of induction variables.
4380
4381      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4382         make a lot of ivs without a reason).
4383      -- if A - R < U + I <= A, the cost is I * PRES_COST
4384      -- if U + I > A, the cost is I * PRES_COST and
4385         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4386
4387   if (dump_file && (dump_flags & TDF_DETAILS))
4388     {
4389       fprintf (dump_file, "Global costs:\n");
4390       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4391       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4392       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4393       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4394     }
4395
4396   n = 0;
4397   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4398     {
4399       op = PHI_RESULT (phi);
4400
4401       if (!is_gimple_reg (op))
4402         continue;
4403
4404       if (get_iv (data, op))
4405         continue;
4406
4407       n++;
4408     }
4409
4410   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4411     {
4412       struct version_info *info = ver_info (data, j);
4413
4414       if (info->inv_id && info->has_nonlin_use)
4415         n++;
4416     }
4417
4418   loop_data (loop)->regs_used = n;
4419   if (dump_file && (dump_flags & TDF_DETAILS))
4420     fprintf (dump_file, "  regs_used %d\n", n);
4421
4422   if (dump_file && (dump_flags & TDF_DETAILS))
4423     {
4424       fprintf (dump_file, "  cost for size:\n");
4425       fprintf (dump_file, "  ivs\tcost\n");
4426       for (j = 0; j <= 2 * target_avail_regs; j++)
4427         fprintf (dump_file, "  %d\t%d\n", j,
4428                  ivopts_global_cost_for_size (data, j));
4429       fprintf (dump_file, "\n");
4430     }
4431 }
4432
4433 /* Returns true if A is a cheaper cost pair than B.  */
4434
4435 static bool
4436 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4437 {
4438   if (!a)
4439     return false;
4440
4441   if (!b)
4442     return true;
4443
4444   if (a->cost < b->cost)
4445     return true;
4446
4447   if (a->cost > b->cost)
4448     return false;
4449
4450   /* In case the costs are the same, prefer the cheaper candidate.  */
4451   if (a->cand->cost < b->cand->cost)
4452     return true;
4453
4454   return false;
4455 }
4456
4457 /* Computes the cost field of IVS structure.  */
4458
4459 static void
4460 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4461 {
4462   unsigned cost = 0;
4463
4464   cost += ivs->cand_use_cost;
4465   cost += ivs->cand_cost;
4466   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4467
4468   ivs->cost = cost;
4469 }
4470
4471 /* Remove invariants in set INVS to set IVS.  */
4472
4473 static void
4474 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4475 {
4476   bitmap_iterator bi;
4477   unsigned iid;
4478
4479   if (!invs)
4480     return;
4481
4482   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4483     {
4484       ivs->n_invariant_uses[iid]--;
4485       if (ivs->n_invariant_uses[iid] == 0)
4486         ivs->n_regs--;
4487     }
4488 }
4489
4490 /* Set USE not to be expressed by any candidate in IVS.  */
4491
4492 static void
4493 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4494                  struct iv_use *use)
4495 {
4496   unsigned uid = use->id, cid;
4497   struct cost_pair *cp;
4498
4499   cp = ivs->cand_for_use[uid];
4500   if (!cp)
4501     return;
4502   cid = cp->cand->id;
4503
4504   ivs->bad_uses++;
4505   ivs->cand_for_use[uid] = NULL;
4506   ivs->n_cand_uses[cid]--;
4507
4508   if (ivs->n_cand_uses[cid] == 0)
4509     {
4510       bitmap_clear_bit (ivs->cands, cid);
4511       /* Do not count the pseudocandidates.  */
4512       if (cp->cand->iv)
4513         ivs->n_regs--;
4514       ivs->n_cands--;
4515       ivs->cand_cost -= cp->cand->cost;
4516
4517       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4518     }
4519
4520   ivs->cand_use_cost -= cp->cost;
4521
4522   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4523   iv_ca_recount_cost (data, ivs);
4524 }
4525
4526 /* Add invariants in set INVS to set IVS.  */
4527
4528 static void
4529 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4530 {
4531   bitmap_iterator bi;
4532   unsigned iid;
4533
4534   if (!invs)
4535     return;
4536
4537   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4538     {
4539       ivs->n_invariant_uses[iid]++;
4540       if (ivs->n_invariant_uses[iid] == 1)
4541         ivs->n_regs++;
4542     }
4543 }
4544
4545 /* Set cost pair for USE in set IVS to CP.  */
4546
4547 static void
4548 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4549               struct iv_use *use, struct cost_pair *cp)
4550 {
4551   unsigned uid = use->id, cid;
4552
4553   if (ivs->cand_for_use[uid] == cp)
4554     return;
4555
4556   if (ivs->cand_for_use[uid])
4557     iv_ca_set_no_cp (data, ivs, use);
4558
4559   if (cp)
4560     {
4561       cid = cp->cand->id;
4562
4563       ivs->bad_uses--;
4564       ivs->cand_for_use[uid] = cp;
4565       ivs->n_cand_uses[cid]++;
4566       if (ivs->n_cand_uses[cid] == 1)
4567         {
4568           bitmap_set_bit (ivs->cands, cid);
4569           /* Do not count the pseudocandidates.  */
4570           if (cp->cand->iv)
4571             ivs->n_regs++;
4572           ivs->n_cands++;
4573           ivs->cand_cost += cp->cand->cost;
4574
4575           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4576         }
4577
4578       ivs->cand_use_cost += cp->cost;
4579       iv_ca_set_add_invariants (ivs, cp->depends_on);
4580       iv_ca_recount_cost (data, ivs);
4581     }
4582 }
4583
4584 /* Extend set IVS by expressing USE by some of the candidates in it
4585    if possible.  */
4586
4587 static void
4588 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4589                struct iv_use *use)
4590 {
4591   struct cost_pair *best_cp = NULL, *cp;
4592   bitmap_iterator bi;
4593   unsigned i;
4594
4595   gcc_assert (ivs->upto >= use->id);
4596
4597   if (ivs->upto == use->id)
4598     {
4599       ivs->upto++;
4600       ivs->bad_uses++;
4601     }
4602
4603   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4604     {
4605       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4606
4607       if (cheaper_cost_pair (cp, best_cp))
4608         best_cp = cp;
4609     }
4610
4611   iv_ca_set_cp (data, ivs, use, best_cp);
4612 }
4613
4614 /* Get cost for assignment IVS.  */
4615
4616 static unsigned
4617 iv_ca_cost (struct iv_ca *ivs)
4618 {
4619   return (ivs->bad_uses ? INFTY : ivs->cost);
4620 }
4621
4622 /* Returns true if all dependences of CP are among invariants in IVS.  */
4623
4624 static bool
4625 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4626 {
4627   unsigned i;
4628   bitmap_iterator bi;
4629
4630   if (!cp->depends_on)
4631     return true;
4632
4633   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4634     {
4635       if (ivs->n_invariant_uses[i] == 0)
4636         return false;
4637     }
4638
4639   return true;
4640 }
4641
4642 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4643    it before NEXT_CHANGE.  */
4644
4645 static struct iv_ca_delta *
4646 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4647                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4648 {
4649   struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4650
4651   change->use = use;
4652   change->old_cp = old_cp;
4653   change->new_cp = new_cp;
4654   change->next_change = next_change;
4655
4656   return change;
4657 }
4658
4659 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4660    are rewritten.  */
4661
4662 static struct iv_ca_delta *
4663 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4664 {
4665   struct iv_ca_delta *last;
4666
4667   if (!l2)
4668     return l1;
4669
4670   if (!l1)
4671     return l2;
4672
4673   for (last = l1; last->next_change; last = last->next_change)
4674     continue;
4675   last->next_change = l2;
4676
4677   return l1;
4678 }
4679
4680 /* Returns candidate by that USE is expressed in IVS.  */
4681
4682 static struct cost_pair *
4683 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4684 {
4685   return ivs->cand_for_use[use->id];
4686 }
4687
4688 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4689
4690 static struct iv_ca_delta *
4691 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4692 {
4693   struct iv_ca_delta *act, *next, *prev = NULL;
4694   struct cost_pair *tmp;
4695
4696   for (act = delta; act; act = next)
4697     {
4698       next = act->next_change;
4699       act->next_change = prev;
4700       prev = act;
4701
4702       tmp = act->old_cp;
4703       act->old_cp = act->new_cp;
4704       act->new_cp = tmp;
4705     }
4706
4707   return prev;
4708 }
4709
4710 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4711    reverted instead.  */
4712
4713 static void
4714 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4715                     struct iv_ca_delta *delta, bool forward)
4716 {
4717   struct cost_pair *from, *to;
4718   struct iv_ca_delta *act;
4719
4720   if (!forward)
4721     delta = iv_ca_delta_reverse (delta);
4722
4723   for (act = delta; act; act = act->next_change)
4724     {
4725       from = act->old_cp;
4726       to = act->new_cp;
4727       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4728       iv_ca_set_cp (data, ivs, act->use, to);
4729     }
4730
4731   if (!forward)
4732     iv_ca_delta_reverse (delta);
4733 }
4734
4735 /* Returns true if CAND is used in IVS.  */
4736
4737 static bool
4738 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4739 {
4740   return ivs->n_cand_uses[cand->id] > 0;
4741 }
4742
4743 /* Returns number of induction variable candidates in the set IVS.  */
4744
4745 static unsigned
4746 iv_ca_n_cands (struct iv_ca *ivs)
4747 {
4748   return ivs->n_cands;
4749 }
4750
4751 /* Free the list of changes DELTA.  */
4752
4753 static void
4754 iv_ca_delta_free (struct iv_ca_delta **delta)
4755 {
4756   struct iv_ca_delta *act, *next;
4757
4758   for (act = *delta; act; act = next)
4759     {
4760       next = act->next_change;
4761       free (act);
4762     }
4763
4764   *delta = NULL;
4765 }
4766
4767 /* Allocates new iv candidates assignment.  */
4768
4769 static struct iv_ca *
4770 iv_ca_new (struct ivopts_data *data)
4771 {
4772   struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4773
4774   nw->upto = 0;
4775   nw->bad_uses = 0;
4776   nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4777   nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4778   nw->cands = BITMAP_ALLOC (NULL);
4779   nw->n_cands = 0;
4780   nw->n_regs = 0;
4781   nw->cand_use_cost = 0;
4782   nw->cand_cost = 0;
4783   nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4784   nw->cost = 0;
4785
4786   return nw;
4787 }
4788
4789 /* Free memory occupied by the set IVS.  */
4790
4791 static void
4792 iv_ca_free (struct iv_ca **ivs)
4793 {
4794   free ((*ivs)->cand_for_use);
4795   free ((*ivs)->n_cand_uses);
4796   BITMAP_FREE ((*ivs)->cands);
4797   free ((*ivs)->n_invariant_uses);
4798   free (*ivs);
4799   *ivs = NULL;
4800 }
4801
4802 /* Dumps IVS to FILE.  */
4803
4804 static void
4805 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4806 {
4807   const char *pref = "  invariants ";
4808   unsigned i;
4809
4810   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4811   bitmap_print (file, ivs->cands, "  candidates ","\n");
4812
4813   for (i = 1; i <= data->max_inv_id; i++)
4814     if (ivs->n_invariant_uses[i])
4815       {
4816         fprintf (file, "%s%d", pref, i);
4817         pref = ", ";
4818       }
4819   fprintf (file, "\n");
4820 }
4821
4822 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4823    new set, and store differences in DELTA.  Number of induction variables
4824    in the new set is stored to N_IVS.  */
4825
4826 static unsigned
4827 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4828               struct iv_cand *cand, struct iv_ca_delta **delta,
4829               unsigned *n_ivs)
4830 {
4831   unsigned i, cost;
4832   struct iv_use *use;
4833   struct cost_pair *old_cp, *new_cp;
4834
4835   *delta = NULL;
4836   for (i = 0; i < ivs->upto; i++)
4837     {
4838       use = iv_use (data, i);
4839       old_cp = iv_ca_cand_for_use (ivs, use);
4840
4841       if (old_cp
4842           && old_cp->cand == cand)
4843         continue;
4844
4845       new_cp = get_use_iv_cost (data, use, cand);
4846       if (!new_cp)
4847         continue;
4848
4849       if (!iv_ca_has_deps (ivs, new_cp))
4850         continue;
4851
4852       if (!cheaper_cost_pair (new_cp, old_cp))
4853         continue;
4854
4855       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4856     }
4857
4858   iv_ca_delta_commit (data, ivs, *delta, true);
4859   cost = iv_ca_cost (ivs);
4860   if (n_ivs)
4861     *n_ivs = iv_ca_n_cands (ivs);
4862   iv_ca_delta_commit (data, ivs, *delta, false);
4863
4864   return cost;
4865 }
4866
4867 /* Try narrowing set IVS by removing CAND.  Return the cost of
4868    the new set and store the differences in DELTA.  */
4869
4870 static unsigned
4871 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4872               struct iv_cand *cand, struct iv_ca_delta **delta)
4873 {
4874   unsigned i, ci;
4875   struct iv_use *use;
4876   struct cost_pair *old_cp, *new_cp, *cp;
4877   bitmap_iterator bi;
4878   struct iv_cand *cnd;
4879   unsigned cost;
4880
4881   *delta = NULL;
4882   for (i = 0; i < n_iv_uses (data); i++)
4883     {
4884       use = iv_use (data, i);
4885
4886       old_cp = iv_ca_cand_for_use (ivs, use);
4887       if (old_cp->cand != cand)
4888         continue;
4889
4890       new_cp = NULL;
4891
4892       if (data->consider_all_candidates)
4893         {
4894           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4895             {
4896               if (ci == cand->id)
4897                 continue;
4898
4899               cnd = iv_cand (data, ci);
4900
4901               cp = get_use_iv_cost (data, use, cnd);
4902               if (!cp)
4903                 continue;
4904               if (!iv_ca_has_deps (ivs, cp))
4905                 continue;
4906
4907               if (!cheaper_cost_pair (cp, new_cp))
4908                 continue;
4909
4910               new_cp = cp;
4911             }
4912         }
4913       else
4914         {
4915           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4916             {
4917               if (ci == cand->id)
4918                 continue;
4919
4920               cnd = iv_cand (data, ci);
4921
4922               cp = get_use_iv_cost (data, use, cnd);
4923               if (!cp)
4924                 continue;
4925               if (!iv_ca_has_deps (ivs, cp))
4926                 continue;
4927
4928               if (!cheaper_cost_pair (cp, new_cp))
4929                 continue;
4930
4931               new_cp = cp;
4932             }
4933         }
4934
4935       if (!new_cp)
4936         {
4937           iv_ca_delta_free (delta);
4938           return INFTY;
4939         }
4940
4941       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4942     }
4943
4944   iv_ca_delta_commit (data, ivs, *delta, true);
4945   cost = iv_ca_cost (ivs);
4946   iv_ca_delta_commit (data, ivs, *delta, false);
4947
4948   return cost;
4949 }
4950
4951 /* Try optimizing the set of candidates IVS by removing candidates different
4952    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4953    differences in DELTA.  */
4954
4955 static unsigned
4956 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4957              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4958 {
4959   bitmap_iterator bi;
4960   struct iv_ca_delta *act_delta, *best_delta;
4961   unsigned i, best_cost, acost;
4962   struct iv_cand *cand;
4963
4964   best_delta = NULL;
4965   best_cost = iv_ca_cost (ivs);
4966
4967   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4968     {
4969       cand = iv_cand (data, i);
4970
4971       if (cand == except_cand)
4972         continue;
4973
4974       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4975
4976       if (acost < best_cost)
4977         {
4978           best_cost = acost;
4979           iv_ca_delta_free (&best_delta);
4980           best_delta = act_delta;
4981         }
4982       else
4983         iv_ca_delta_free (&act_delta);
4984     }
4985
4986   if (!best_delta)
4987     {
4988       *delta = NULL;
4989       return best_cost;
4990     }
4991
4992   /* Recurse to possibly remove other unnecessary ivs.  */
4993   iv_ca_delta_commit (data, ivs, best_delta, true);
4994   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4995   iv_ca_delta_commit (data, ivs, best_delta, false);
4996   *delta = iv_ca_delta_join (best_delta, *delta);
4997   return best_cost;
4998 }
4999
5000 /* Tries to extend the sets IVS in the best possible way in order
5001    to express the USE.  */
5002
5003 static bool
5004 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5005                   struct iv_use *use)
5006 {
5007   unsigned best_cost, act_cost;
5008   unsigned i;
5009   bitmap_iterator bi;
5010   struct iv_cand *cand;
5011   struct iv_ca_delta *best_delta = NULL, *act_delta;
5012   struct cost_pair *cp;
5013
5014   iv_ca_add_use (data, ivs, use);
5015   best_cost = iv_ca_cost (ivs);
5016
5017   cp = iv_ca_cand_for_use (ivs, use);
5018   if (cp)
5019     {
5020       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5021       iv_ca_set_no_cp (data, ivs, use);
5022     }
5023
5024   /* First try important candidates.  Only if it fails, try the specific ones.
5025      Rationale -- in loops with many variables the best choice often is to use
5026      just one generic biv.  If we added here many ivs specific to the uses,
5027      the optimization algorithm later would be likely to get stuck in a local
5028      minimum, thus causing us to create too many ivs.  The approach from
5029      few ivs to more seems more likely to be successful -- starting from few
5030      ivs, replacing an expensive use by a specific iv should always be a
5031      win.  */
5032   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5033     {
5034       cand = iv_cand (data, i);
5035
5036       if (iv_ca_cand_used_p (ivs, cand))
5037         continue;
5038
5039       cp = get_use_iv_cost (data, use, cand);
5040       if (!cp)
5041         continue;
5042
5043       iv_ca_set_cp (data, ivs, use, cp);
5044       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5045       iv_ca_set_no_cp (data, ivs, use);
5046       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5047
5048       if (act_cost < best_cost)
5049         {
5050           best_cost = act_cost;
5051
5052           iv_ca_delta_free (&best_delta);
5053           best_delta = act_delta;
5054         }
5055       else
5056         iv_ca_delta_free (&act_delta);
5057     }
5058
5059   if (best_cost == INFTY)
5060     {
5061       for (i = 0; i < use->n_map_members; i++)
5062         {
5063           cp = use->cost_map + i;
5064           cand = cp->cand;
5065           if (!cand)
5066             continue;
5067
5068           /* Already tried this.  */
5069           if (cand->important)
5070             continue;
5071
5072           if (iv_ca_cand_used_p (ivs, cand))
5073             continue;
5074
5075           act_delta = NULL;
5076           iv_ca_set_cp (data, ivs, use, cp);
5077           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5078           iv_ca_set_no_cp (data, ivs, use);
5079           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5080                                        cp, act_delta);
5081
5082           if (act_cost < best_cost)
5083             {
5084               best_cost = act_cost;
5085
5086               if (best_delta)
5087                 iv_ca_delta_free (&best_delta);
5088               best_delta = act_delta;
5089             }
5090           else
5091             iv_ca_delta_free (&act_delta);
5092         }
5093     }
5094
5095   iv_ca_delta_commit (data, ivs, best_delta, true);
5096   iv_ca_delta_free (&best_delta);
5097
5098   return (best_cost != INFTY);
5099 }
5100
5101 /* Finds an initial assignment of candidates to uses.  */
5102
5103 static struct iv_ca *
5104 get_initial_solution (struct ivopts_data *data)
5105 {
5106   struct iv_ca *ivs = iv_ca_new (data);
5107   unsigned i;
5108
5109   for (i = 0; i < n_iv_uses (data); i++)
5110     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5111       {
5112         iv_ca_free (&ivs);
5113         return NULL;
5114       }
5115
5116   return ivs;
5117 }
5118
5119 /* Tries to improve set of induction variables IVS.  */
5120
5121 static bool
5122 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5123 {
5124   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5125   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5126   struct iv_cand *cand;
5127
5128   /* Try extending the set of induction variables by one.  */
5129   for (i = 0; i < n_iv_cands (data); i++)
5130     {
5131       cand = iv_cand (data, i);
5132
5133       if (iv_ca_cand_used_p (ivs, cand))
5134         continue;
5135
5136       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5137       if (!act_delta)
5138         continue;
5139
5140       /* If we successfully added the candidate and the set is small enough,
5141          try optimizing it by removing other candidates.  */
5142       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5143         {
5144           iv_ca_delta_commit (data, ivs, act_delta, true);
5145           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5146           iv_ca_delta_commit (data, ivs, act_delta, false);
5147           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5148         }
5149
5150       if (acost < best_cost)
5151         {
5152           best_cost = acost;
5153           iv_ca_delta_free (&best_delta);
5154           best_delta = act_delta;
5155         }
5156       else
5157         iv_ca_delta_free (&act_delta);
5158     }
5159
5160   if (!best_delta)
5161     {
5162       /* Try removing the candidates from the set instead.  */
5163       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5164
5165       /* Nothing more we can do.  */
5166       if (!best_delta)
5167         return false;
5168     }
5169
5170   iv_ca_delta_commit (data, ivs, best_delta, true);
5171   gcc_assert (best_cost == iv_ca_cost (ivs));
5172   iv_ca_delta_free (&best_delta);
5173   return true;
5174 }
5175
5176 /* Attempts to find the optimal set of induction variables.  We do simple
5177    greedy heuristic -- we try to replace at most one candidate in the selected
5178    solution and remove the unused ivs while this improves the cost.  */
5179
5180 static struct iv_ca *
5181 find_optimal_iv_set (struct ivopts_data *data)
5182 {
5183   unsigned i;
5184   struct iv_ca *set;
5185   struct iv_use *use;
5186
5187   /* Get the initial solution.  */
5188   set = get_initial_solution (data);
5189   if (!set)
5190     {
5191       if (dump_file && (dump_flags & TDF_DETAILS))
5192         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5193       return NULL;
5194     }
5195
5196   if (dump_file && (dump_flags & TDF_DETAILS))
5197     {
5198       fprintf (dump_file, "Initial set of candidates:\n");
5199       iv_ca_dump (data, dump_file, set);
5200     }
5201
5202   while (try_improve_iv_set (data, set))
5203     {
5204       if (dump_file && (dump_flags & TDF_DETAILS))
5205         {
5206           fprintf (dump_file, "Improved to:\n");
5207           iv_ca_dump (data, dump_file, set);
5208         }
5209     }
5210
5211   if (dump_file && (dump_flags & TDF_DETAILS))
5212     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5213
5214   for (i = 0; i < n_iv_uses (data); i++)
5215     {
5216       use = iv_use (data, i);
5217       use->selected = iv_ca_cand_for_use (set, use)->cand;
5218     }
5219
5220   return set;
5221 }
5222
5223 /* Creates a new induction variable corresponding to CAND.  */
5224
5225 static void
5226 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5227 {
5228   block_stmt_iterator incr_pos;
5229   tree base;
5230   bool after = false;
5231
5232   if (!cand->iv)
5233     return;
5234
5235   switch (cand->pos)
5236     {
5237     case IP_NORMAL:
5238       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5239       break;
5240
5241     case IP_END:
5242       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5243       after = true;
5244       break;
5245
5246     case IP_ORIGINAL:
5247       /* Mark that the iv is preserved.  */
5248       name_info (data, cand->var_before)->preserve_biv = true;
5249       name_info (data, cand->var_after)->preserve_biv = true;
5250
5251       /* Rewrite the increment so that it uses var_before directly.  */
5252       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5253
5254       return;
5255     }
5256
5257   gimple_add_tmp_var (cand->var_before);
5258   add_referenced_tmp_var (cand->var_before);
5259
5260   base = unshare_expr (cand->iv->base);
5261
5262   create_iv (base, unshare_expr (cand->iv->step),
5263              cand->var_before, data->current_loop,
5264              &incr_pos, after, &cand->var_before, &cand->var_after);
5265 }
5266
5267 /* Creates new induction variables described in SET.  */
5268
5269 static void
5270 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5271 {
5272   unsigned i;
5273   struct iv_cand *cand;
5274   bitmap_iterator bi;
5275
5276   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5277     {
5278       cand = iv_cand (data, i);
5279       create_new_iv (data, cand);
5280     }
5281 }
5282
5283 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5284    is true, remove also the ssa name defined by the statement.  */
5285
5286 static void
5287 remove_statement (tree stmt, bool including_defined_name)
5288 {
5289   if (TREE_CODE (stmt) == PHI_NODE)
5290     {
5291       if (!including_defined_name)
5292         {
5293           /* Prevent the ssa name defined by the statement from being removed.  */
5294           SET_PHI_RESULT (stmt, NULL);
5295         }
5296       remove_phi_node (stmt, NULL_TREE);
5297     }
5298   else
5299     {
5300       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5301
5302       bsi_remove (&bsi);
5303     }
5304 }
5305
5306 /* Rewrites USE (definition of iv used in a nonlinear expression)
5307    using candidate CAND.  */
5308
5309 static void
5310 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5311                             struct iv_use *use, struct iv_cand *cand)
5312 {
5313   tree comp;
5314   tree op, stmts, tgt, ass;
5315   block_stmt_iterator bsi, pbsi;
5316
5317   /* An important special case -- if we are asked to express value of
5318      the original iv by itself, just exit; there is no need to
5319      introduce a new computation (that might also need casting the
5320      variable to unsigned and back).  */
5321   if (cand->pos == IP_ORIGINAL
5322       && TREE_CODE (use->stmt) == MODIFY_EXPR
5323       && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5324     {
5325       op = TREE_OPERAND (use->stmt, 1);
5326
5327       /* Be a bit careful.  In case variable is expressed in some
5328          complicated way, rewrite it so that we may get rid of this
5329          complicated expression.  */
5330       if ((TREE_CODE (op) == PLUS_EXPR
5331            || TREE_CODE (op) == MINUS_EXPR)
5332           && TREE_OPERAND (op, 0) == cand->var_before
5333           && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5334         return;
5335     }
5336
5337   comp = get_computation (data->current_loop, use, cand);
5338   switch (TREE_CODE (use->stmt))
5339     {
5340     case PHI_NODE:
5341       tgt = PHI_RESULT (use->stmt);
5342
5343       /* If we should keep the biv, do not replace it.  */
5344       if (name_info (data, tgt)->preserve_biv)
5345         return;
5346
5347       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5348       while (!bsi_end_p (pbsi)
5349              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5350         {
5351           bsi = pbsi;
5352           bsi_next (&pbsi);
5353         }
5354       break;
5355
5356     case MODIFY_EXPR:
5357       tgt = TREE_OPERAND (use->stmt, 0);
5358       bsi = bsi_for_stmt (use->stmt);
5359       break;
5360
5361     default:
5362       gcc_unreachable ();
5363     }
5364
5365   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5366
5367   if (TREE_CODE (use->stmt) == PHI_NODE)
5368     {
5369       if (stmts)
5370         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5371       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5372       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5373       remove_statement (use->stmt, false);
5374       SSA_NAME_DEF_STMT (tgt) = ass;
5375     }
5376   else
5377     {
5378       if (stmts)
5379         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5380       TREE_OPERAND (use->stmt, 1) = op;
5381     }
5382 }
5383
5384 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5385    for_each_index.  */
5386
5387 static bool
5388 idx_remove_ssa_names (tree base, tree *idx,
5389                       void *data ATTRIBUTE_UNUSED)
5390 {
5391   tree *op;
5392
5393   if (TREE_CODE (*idx) == SSA_NAME)
5394     *idx = SSA_NAME_VAR (*idx);
5395
5396   if (TREE_CODE (base) == ARRAY_REF)
5397     {
5398       op = &TREE_OPERAND (base, 2);
5399       if (*op
5400           && TREE_CODE (*op) == SSA_NAME)
5401         *op = SSA_NAME_VAR (*op);
5402       op = &TREE_OPERAND (base, 3);
5403       if (*op
5404           && TREE_CODE (*op) == SSA_NAME)
5405         *op = SSA_NAME_VAR (*op);
5406     }
5407
5408   return true;
5409 }
5410
5411 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5412
5413 static tree
5414 unshare_and_remove_ssa_names (tree ref)
5415 {
5416   ref = unshare_expr (ref);
5417   for_each_index (&ref, idx_remove_ssa_names, NULL);
5418
5419   return ref;
5420 }
5421
5422 /* Extract the alias analysis info for the memory reference REF.  There are
5423    several ways how this information may be stored and what precisely is
5424    its semantics depending on the type of the reference, but there always is
5425    somewhere hidden one _DECL node that is used to determine the set of
5426    virtual operands for the reference.  The code below deciphers this jungle
5427    and extracts this single useful piece of information.  */
5428
5429 static tree
5430 get_ref_tag (tree ref)
5431 {
5432   tree var = get_base_address (ref);
5433   tree tag;
5434
5435   if (!var)
5436     return NULL_TREE;
5437
5438   if (TREE_CODE (var) == INDIRECT_REF)
5439     var = TREE_OPERAND (var, 0);
5440   if (TREE_CODE (var) == SSA_NAME)
5441     {
5442       if (SSA_NAME_PTR_INFO (var))
5443         {
5444           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5445           if (tag)
5446             return tag;
5447         }
5448
5449       var = SSA_NAME_VAR (var);
5450     }
5451
5452   if (DECL_P (var))
5453     {
5454       tag = var_ann (var)->type_mem_tag;
5455       if (tag)
5456         return tag;
5457
5458       return var;
5459     }
5460
5461   return NULL_TREE;
5462 }
5463
5464 /* Copies the reference information from OLD_REF to NEW_REF.  */
5465
5466 static void
5467 copy_ref_info (tree new_ref, tree old_ref)
5468 {
5469   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5470     copy_mem_ref_info (new_ref, old_ref);
5471   else
5472     {
5473       TMR_TAG (new_ref) = get_ref_tag (old_ref);
5474       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5475     }
5476 }
5477
5478 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5479
5480 static void
5481 rewrite_use_address (struct ivopts_data *data,
5482                      struct iv_use *use, struct iv_cand *cand)
5483 {
5484   struct affine_tree_combination aff;
5485   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5486   tree ref;
5487
5488   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5489   unshare_aff_combination (&aff);
5490
5491   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5492   copy_ref_info (ref, *use->op_p);
5493   *use->op_p = ref;
5494 }
5495
5496 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5497    candidate CAND.  */
5498
5499 static void
5500 rewrite_use_compare (struct ivopts_data *data,
5501                      struct iv_use *use, struct iv_cand *cand)
5502 {
5503   tree comp;
5504   tree *op_p, cond, op, stmts, bound;
5505   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5506   enum tree_code compare;
5507   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5508
5509   bound = cp->value;
5510   if (bound)
5511     {
5512       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5513       tree var_type = TREE_TYPE (var);
5514
5515       compare = iv_elimination_compare (data, use);
5516       bound = fold_convert (var_type, bound);
5517       op = force_gimple_operand (unshare_expr (bound), &stmts,
5518                                  true, NULL_TREE);
5519
5520       if (stmts)
5521         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5522
5523       *use->op_p = build2 (compare, boolean_type_node, var, op);
5524       update_stmt (use->stmt);
5525       return;
5526     }
5527
5528   /* The induction variable elimination failed; just express the original
5529      giv.  */
5530   comp = get_computation (data->current_loop, use, cand);
5531
5532   cond = *use->op_p;
5533   op_p = &TREE_OPERAND (cond, 0);
5534   if (TREE_CODE (*op_p) != SSA_NAME
5535       || zero_p (get_iv (data, *op_p)->step))
5536     op_p = &TREE_OPERAND (cond, 1);
5537
5538   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5539   if (stmts)
5540     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5541
5542   *op_p = op;
5543 }
5544
5545 /* Ensure that operand *OP_P may be used at the end of EXIT without
5546    violating loop closed ssa form.  */
5547
5548 static void
5549 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5550 {
5551   basic_block def_bb;
5552   struct loop *def_loop;
5553   tree phi, use;
5554
5555   use = USE_FROM_PTR (op_p);
5556   if (TREE_CODE (use) != SSA_NAME)
5557     return;
5558
5559   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5560   if (!def_bb)
5561     return;
5562
5563   def_loop = def_bb->loop_father;
5564   if (flow_bb_inside_loop_p (def_loop, exit->dest))
5565     return;
5566
5567   /* Try finding a phi node that copies the value out of the loop.  */
5568   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5569     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5570       break;
5571
5572   if (!phi)
5573     {
5574       /* Create such a phi node.  */
5575       tree new_name = duplicate_ssa_name (use, NULL);
5576
5577       phi = create_phi_node (new_name, exit->dest);
5578       SSA_NAME_DEF_STMT (new_name) = phi;
5579       add_phi_arg (phi, use, exit);
5580     }
5581
5582   SET_USE (op_p, PHI_RESULT (phi));
5583 }
5584
5585 /* Ensure that operands of STMT may be used at the end of EXIT without
5586    violating loop closed ssa form.  */
5587
5588 static void
5589 protect_loop_closed_ssa_form (edge exit, tree stmt)
5590 {
5591   ssa_op_iter iter;
5592   use_operand_p use_p;
5593
5594   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5595     protect_loop_closed_ssa_form_use (exit, use_p);
5596 }
5597
5598 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5599    so that they are emitted on the correct place, and so that the loop closed
5600    ssa form is preserved.  */
5601
5602 static void
5603 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5604 {
5605   tree_stmt_iterator tsi;
5606   block_stmt_iterator bsi;
5607   tree phi, stmt, def, next;
5608
5609   if (!single_pred_p (exit->dest))
5610     split_loop_exit_edge (exit);
5611
5612   /* Ensure there is label in exit->dest, so that we can
5613      insert after it.  */
5614   tree_block_label (exit->dest);
5615   bsi = bsi_after_labels (exit->dest);
5616
5617   if (TREE_CODE (stmts) == STATEMENT_LIST)
5618     {
5619       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5620         {
5621           bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5622           protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5623         }
5624     }
5625   else
5626     {
5627       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5628       protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5629     }
5630
5631   if (!op)
5632     return;
5633
5634   for (phi = phi_nodes (exit->dest); phi; phi = next)
5635     {
5636       next = PHI_CHAIN (phi);
5637
5638       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5639         {
5640           def = PHI_RESULT (phi);
5641           remove_statement (phi, false);
5642           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5643                         def, op);
5644           SSA_NAME_DEF_STMT (def) = stmt;
5645           bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5646         }
5647     }
5648 }
5649
5650 /* Rewrites the final value of USE (that is only needed outside of the loop)
5651    using candidate CAND.  */
5652
5653 static void
5654 rewrite_use_outer (struct ivopts_data *data,
5655                    struct iv_use *use, struct iv_cand *cand)
5656 {
5657   edge exit;
5658   tree value, op, stmts, tgt;
5659   tree phi;
5660
5661   switch (TREE_CODE (use->stmt))
5662     {
5663     case PHI_NODE:
5664       tgt = PHI_RESULT (use->stmt);
5665       break;
5666     case MODIFY_EXPR:
5667       tgt = TREE_OPERAND (use->stmt, 0);
5668       break;
5669     default:
5670       gcc_unreachable ();
5671     }
5672
5673   exit = single_dom_exit (data->current_loop);
5674
5675   if (exit)
5676     {
5677       if (!cand->iv)
5678         {
5679           struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5680           value = unshare_expr (cp->value);
5681         }
5682       else
5683         value = get_computation_at (data->current_loop,
5684                                     use, cand, last_stmt (exit->src));
5685
5686       op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5687
5688       /* If we will preserve the iv anyway and we would need to perform
5689          some computation to replace the final value, do nothing.  */
5690       if (stmts && name_info (data, tgt)->preserve_biv)
5691         return;
5692
5693       for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5694         {
5695           use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5696
5697           if (USE_FROM_PTR (use_p) == tgt)
5698             SET_USE (use_p, op);
5699         }
5700
5701       if (stmts)
5702         compute_phi_arg_on_exit (exit, stmts, op);
5703
5704       /* Enable removal of the statement.  We cannot remove it directly,
5705          since we may still need the aliasing information attached to the
5706          ssa name defined by it.  */
5707       name_info (data, tgt)->iv->have_use_for = false;
5708       return;
5709     }
5710
5711   /* If the variable is going to be preserved anyway, there is nothing to
5712      do.  */
5713   if (name_info (data, tgt)->preserve_biv)
5714     return;
5715
5716   /* Otherwise we just need to compute the iv.  */
5717   rewrite_use_nonlinear_expr (data, use, cand);
5718 }
5719
5720 /* Rewrites USE using candidate CAND.  */
5721
5722 static void
5723 rewrite_use (struct ivopts_data *data,
5724              struct iv_use *use, struct iv_cand *cand)
5725 {
5726   switch (use->type)
5727     {
5728       case USE_NONLINEAR_EXPR:
5729         rewrite_use_nonlinear_expr (data, use, cand);
5730         break;
5731
5732       case USE_OUTER:
5733         rewrite_use_outer (data, use, cand);
5734         break;
5735
5736       case USE_ADDRESS:
5737         rewrite_use_address (data, use, cand);
5738         break;
5739
5740       case USE_COMPARE:
5741         rewrite_use_compare (data, use, cand);
5742         break;
5743
5744       default:
5745         gcc_unreachable ();
5746     }
5747   update_stmt (use->stmt);
5748 }
5749
5750 /* Rewrite the uses using the selected induction variables.  */
5751
5752 static void
5753 rewrite_uses (struct ivopts_data *data)
5754 {
5755   unsigned i;
5756   struct iv_cand *cand;
5757   struct iv_use *use;
5758
5759   for (i = 0; i < n_iv_uses (data); i++)
5760     {
5761       use = iv_use (data, i);
5762       cand = use->selected;
5763       gcc_assert (cand);
5764
5765       rewrite_use (data, use, cand);
5766     }
5767 }
5768
5769 /* Removes the ivs that are not used after rewriting.  */
5770
5771 static void
5772 remove_unused_ivs (struct ivopts_data *data)
5773 {
5774   unsigned j;
5775   bitmap_iterator bi;
5776
5777   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5778     {
5779       struct version_info *info;
5780
5781       info = ver_info (data, j);
5782       if (info->iv
5783           && !zero_p (info->iv->step)
5784           && !info->inv_id
5785           && !info->iv->have_use_for
5786           && !info->preserve_biv)
5787         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5788     }
5789 }
5790
5791 /* Frees data allocated by the optimization of a single loop.  */
5792
5793 static void
5794 free_loop_data (struct ivopts_data *data)
5795 {
5796   unsigned i, j;
5797   bitmap_iterator bi;
5798   tree obj;
5799
5800   htab_empty (data->niters);
5801
5802   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5803     {
5804       struct version_info *info;
5805
5806       info = ver_info (data, i);
5807       if (info->iv)
5808         free (info->iv);
5809       info->iv = NULL;
5810       info->has_nonlin_use = false;
5811       info->preserve_biv = false;
5812       info->inv_id = 0;
5813     }
5814   bitmap_clear (data->relevant);
5815   bitmap_clear (data->important_candidates);
5816
5817   for (i = 0; i < n_iv_uses (data); i++)
5818     {
5819       struct iv_use *use = iv_use (data, i);
5820
5821       free (use->iv);
5822       BITMAP_FREE (use->related_cands);
5823       for (j = 0; j < use->n_map_members; j++)
5824         if (use->cost_map[j].depends_on)
5825           BITMAP_FREE (use->cost_map[j].depends_on);
5826       free (use->cost_map);
5827       free (use);
5828     }
5829   VEC_truncate (iv_use_p, data->iv_uses, 0);
5830
5831   for (i = 0; i < n_iv_cands (data); i++)
5832     {
5833       struct iv_cand *cand = iv_cand (data, i);
5834
5835       if (cand->iv)
5836         free (cand->iv);
5837       if (cand->depends_on)
5838         BITMAP_FREE (cand->depends_on);
5839       free (cand);
5840     }
5841   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5842
5843   if (data->version_info_size < num_ssa_names)
5844     {
5845       data->version_info_size = 2 * num_ssa_names;
5846       free (data->version_info);
5847       data->version_info = xcalloc (data->version_info_size,
5848                                     sizeof (struct version_info));
5849     }
5850
5851   data->max_inv_id = 0;
5852
5853   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5854     SET_DECL_RTL (obj, NULL_RTX);
5855
5856   VEC_truncate (tree, decl_rtl_to_reset, 0);
5857 }
5858
5859 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5860    loop tree.  */
5861
5862 static void
5863 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5864 {
5865   unsigned i;
5866
5867   for (i = 1; i < loops->num; i++)
5868     if (loops->parray[i])
5869       {
5870         free (loops->parray[i]->aux);
5871         loops->parray[i]->aux = NULL;
5872       }
5873
5874   free_loop_data (data);
5875   free (data->version_info);
5876   BITMAP_FREE (data->relevant);
5877   BITMAP_FREE (data->important_candidates);
5878   htab_delete (data->niters);
5879
5880   VEC_free (tree, heap, decl_rtl_to_reset);
5881   VEC_free (iv_use_p, heap, data->iv_uses);
5882   VEC_free (iv_cand_p, heap, data->iv_candidates);
5883 }
5884
5885 /* Optimizes the LOOP.  Returns true if anything changed.  */
5886
5887 static bool
5888 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5889 {
5890   bool changed = false;
5891   struct iv_ca *iv_ca;
5892   edge exit;
5893
5894   data->current_loop = loop;
5895
5896   if (dump_file && (dump_flags & TDF_DETAILS))
5897     {
5898       fprintf (dump_file, "Processing loop %d\n", loop->num);
5899
5900       exit = single_dom_exit (loop);
5901       if (exit)
5902         {
5903           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5904                    exit->src->index, exit->dest->index);
5905           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5906           fprintf (dump_file, "\n");
5907         }
5908
5909       fprintf (dump_file, "\n");
5910     }
5911
5912   /* For each ssa name determines whether it behaves as an induction variable
5913      in some loop.  */
5914   if (!find_induction_variables (data))
5915     goto finish;
5916
5917   /* Finds interesting uses (item 1).  */
5918   find_interesting_uses (data);
5919   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5920     goto finish;
5921
5922   /* Finds candidates for the induction variables (item 2).  */
5923   find_iv_candidates (data);
5924
5925   /* Calculates the costs (item 3, part 1).  */
5926   determine_use_iv_costs (data);
5927   determine_iv_costs (data);
5928   determine_set_costs (data);
5929
5930   /* Find the optimal set of induction variables (item 3, part 2).  */
5931   iv_ca = find_optimal_iv_set (data);
5932   if (!iv_ca)
5933     goto finish;
5934   changed = true;
5935
5936   /* Create the new induction variables (item 4, part 1).  */
5937   create_new_ivs (data, iv_ca);
5938   iv_ca_free (&iv_ca);
5939
5940   /* Rewrite the uses (item 4, part 2).  */
5941   rewrite_uses (data);
5942
5943   /* Remove the ivs that are unused after rewriting.  */
5944   remove_unused_ivs (data);
5945
5946   /* We have changed the structure of induction variables; it might happen
5947      that definitions in the scev database refer to some of them that were
5948      eliminated.  */
5949   scev_reset ();
5950
5951 finish:
5952   free_loop_data (data);
5953
5954   return changed;
5955 }
5956
5957 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5958
5959 void
5960 tree_ssa_iv_optimize (struct loops *loops)
5961 {
5962   struct loop *loop;
5963   struct ivopts_data data;
5964
5965   tree_ssa_iv_optimize_init (loops, &data);
5966
5967   /* Optimize the loops starting with the innermost ones.  */
5968   loop = loops->tree_root;
5969   while (loop->inner)
5970     loop = loop->inner;
5971
5972   /* Scan the loops, inner ones first.  */
5973   while (loop != loops->tree_root)
5974     {
5975       if (dump_file && (dump_flags & TDF_DETAILS))
5976         flow_loop_dump (loop, dump_file, NULL, 1);
5977
5978       tree_ssa_iv_optimize_loop (&data, loop);
5979
5980       if (loop->next)
5981         {
5982           loop = loop->next;
5983           while (loop->inner)
5984             loop = loop->inner;
5985         }
5986       else
5987         loop = loop->outer;
5988     }
5989
5990   tree_ssa_iv_optimize_finalize (loops, &data);
5991 }