gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Information attached to loop.  */
 125 struct loop_data
 126 {
 127   unsigned regs_used;   /* Number of registers used.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_OUTER,            /* The induction variable is used outside the loop.  */
 135   USE_ADDRESS,          /* Use in an address.  */
 136   USE_COMPARE           /* Use is a compare.  */
 137 };
 138
 139 /* The candidate - cost pair.  */
 140 struct cost_pair
 141 {
 142   struct iv_cand *cand; /* The candidate.  */
 143   unsigned cost;        /* The cost.  */
 144   bitmap depends_on;    /* The list of invariants that have to be
 145                            preserved.  */
 146 };
 147
 148 /* Use.  */
 149 struct iv_use
 150 {
 151   unsigned id;          /* The id of the use.  */
 152   enum use_type type;   /* Type of the use.  */
 153   struct iv *iv;        /* The induction variable it is based on.  */
 154   tree stmt;            /* Statement in that it occurs.  */
 155   tree *op_p;           /* The place where it occurs.  */
 156   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 157                            important ones.  */
 158
 159   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 160   struct cost_pair *cost_map;
 161                         /* The costs wrto the iv candidates.  */
 162
 163   struct iv_cand *selected;
 164                         /* The selected candidate.  */
 165 };
 166
 167 /* The position where the iv is computed.  */
 168 enum iv_position
 169 {
 170   IP_NORMAL,            /* At the end, just before the exit condition.  */
 171   IP_END,               /* At the end of the latch block.  */
 172   IP_ORIGINAL           /* The original biv.  */
 173 };
 174
 175 /* The induction variable candidate.  */
 176 struct iv_cand
 177 {
 178   unsigned id;          /* The number of the candidate.  */
 179   bool important;       /* Whether this is an "important" candidate, i.e. such
 180                            that it should be considered by all uses.  */
 181   enum iv_position pos; /* Where it is computed.  */
 182   tree incremented_at;  /* For original biv, the statement where it is
 183                            incremented.  */
 184   tree var_before;      /* The variable used for it before increment.  */
 185   tree var_after;       /* The variable used for it after increment.  */
 186   struct iv *iv;        /* The value of the candidate.  NULL for
 187                            "pseudocandidate" used to indicate the possibility
 188                            to replace the final value of an iv by direct
 189                            computation of the value.  */
 190   unsigned cost;        /* Cost of the candidate.  */
 191 };
 192
 193 /* The data used by the induction variable optimizations.  */
 194
 195 struct ivopts_data
 196 {
 197   /* The currently optimized loop.  */
 198   struct loop *current_loop;
 199
 200   /* Numbers of iterations for all exits of the current loop.  */
 201   htab_t niters;
 202
 203   /* The size of version_info array allocated.  */
 204   unsigned version_info_size;
 205
 206   /* The array of information for the ssa names.  */
 207   struct version_info *version_info;
 208
 209   /* The bitmap of indices in version_info whose value was changed.  */
 210   bitmap relevant;
 211
 212   /* The maximum invariant id.  */
 213   unsigned max_inv_id;
 214
 215   /* The uses of induction variables.  */
 216   varray_type iv_uses;
 217
 218   /* The candidates.  */
 219   varray_type iv_candidates;
 220
 221   /* A bitmap of important candidates.  */
 222   bitmap important_candidates;
 223
 224   /* Whether to consider just related and important candidates when replacing a
 225      use.  */
 226   bool consider_all_candidates;
 227 };
 228
 229 /* An assignment of iv candidates to uses.  */
 230
 231 struct iv_ca
 232 {
 233   /* The number of uses covered by the assignment.  */
 234   unsigned upto;
 235
 236   /* Number of uses that cannot be expressed by the candidates in the set.  */
 237   unsigned bad_uses;
 238
 239   /* Candidate assigned to a use, together with the related costs.  */
 240   struct cost_pair **cand_for_use;
 241
 242   /* Number of times each candidate is used.  */
 243   unsigned *n_cand_uses;
 244
 245   /* The candidates used.  */
 246   bitmap cands;
 247
 248   /* The number of candidates in the set.  */
 249   unsigned n_cands;
 250
 251   /* Total number of registers needed.  */
 252   unsigned n_regs;
 253
 254   /* Total cost of expressing uses.  */
 255   unsigned cand_use_cost;
 256
 257   /* Total cost of candidates.  */
 258   unsigned cand_cost;
 259
 260   /* Number of times each invariant is used.  */
 261   unsigned *n_invariant_uses;
 262
 263   /* Total cost of the assignment.  */
 264   unsigned cost;
 265 };
 266
 267 /* Difference of two iv candidate assignments.  */
 268
 269 struct iv_ca_delta
 270 {
 271   /* Changed use.  */
 272   struct iv_use *use;
 273
 274   /* An old assignment (for rollback purposes).  */
 275   struct cost_pair *old_cp;
 276
 277   /* A new assignment.  */
 278   struct cost_pair *new_cp;
 279
 280   /* Next change in the list.  */
 281   struct iv_ca_delta *next_change;
 282 };
 283
 284 /* Bound on number of candidates below that all candidates are considered.  */
 285
 286 #define CONSIDER_ALL_CANDIDATES_BOUND \
 287   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 288
 289 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 290    optimizing such a loop would help, and it would take ages).  */
 291
 292 #define MAX_CONSIDERED_USES \
 293   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 294
 295 /* If there are at most this number of ivs in the set, try removing unnecessary
 296    ivs from the set always.  */
 297
 298 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 299   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 300
 301 /* The list of trees for that the decl_rtl field must be reset is stored
 302    here.  */
 303
 304 static varray_type decl_rtl_to_reset;
 305
 306 /* Number of uses recorded in DATA.  */
 307
 308 static inline unsigned
 309 n_iv_uses (struct ivopts_data *data)
 310 {
 311   return VARRAY_ACTIVE_SIZE (data->iv_uses);
 312 }
 313
 314 /* Ith use recorded in DATA.  */
 315
 316 static inline struct iv_use *
 317 iv_use (struct ivopts_data *data, unsigned i)
 318 {
 319   return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
 320 }
 321
 322 /* Number of candidates recorded in DATA.  */
 323
 324 static inline unsigned
 325 n_iv_cands (struct ivopts_data *data)
 326 {
 327   return VARRAY_ACTIVE_SIZE (data->iv_candidates);
 328 }
 329
 330 /* Ith candidate recorded in DATA.  */
 331
 332 static inline struct iv_cand *
 333 iv_cand (struct ivopts_data *data, unsigned i)
 334 {
 335   return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
 336 }
 337
 338 /* The data for LOOP.  */
 339
 340 static inline struct loop_data *
 341 loop_data (struct loop *loop)
 342 {
 343   return loop->aux;
 344 }
 345
 346 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 347
 348 static edge
 349 single_dom_exit (struct loop *loop)
 350 {
 351   edge exit = loop->single_exit;
 352
 353   if (!exit)
 354     return NULL;
 355
 356   if (!just_once_each_iteration_p (loop, exit->src))
 357     return NULL;
 358
 359   return exit;
 360 }
 361
 362 /* Dumps information about the induction variable IV to FILE.  */
 363
 364 extern void dump_iv (FILE *, struct iv *);
 365 void
 366 dump_iv (FILE *file, struct iv *iv)
 367 {
 368   if (iv->ssa_name)
 369     {
 370       fprintf (file, "ssa name ");
 371       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 372       fprintf (file, "\n");
 373     }
 374
 375   fprintf (file, "  type ");
 376   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 377   fprintf (file, "\n");
 378
 379   if (iv->step)
 380     {
 381       fprintf (file, "  base ");
 382       print_generic_expr (file, iv->base, TDF_SLIM);
 383       fprintf (file, "\n");
 384
 385       fprintf (file, "  step ");
 386       print_generic_expr (file, iv->step, TDF_SLIM);
 387       fprintf (file, "\n");
 388     }
 389   else
 390     {
 391       fprintf (file, "  invariant ");
 392       print_generic_expr (file, iv->base, TDF_SLIM);
 393       fprintf (file, "\n");
 394     }
 395
 396   if (iv->base_object)
 397     {
 398       fprintf (file, "  base object ");
 399       print_generic_expr (file, iv->base_object, TDF_SLIM);
 400       fprintf (file, "\n");
 401     }
 402
 403   if (iv->biv_p)
 404     fprintf (file, "  is a biv\n");
 405 }
 406
 407 /* Dumps information about the USE to FILE.  */
 408
 409 extern void dump_use (FILE *, struct iv_use *);
 410 void
 411 dump_use (FILE *file, struct iv_use *use)
 412 {
 413   fprintf (file, "use %d\n", use->id);
 414
 415   switch (use->type)
 416     {
 417     case USE_NONLINEAR_EXPR:
 418       fprintf (file, "  generic\n");
 419       break;
 420
 421     case USE_OUTER:
 422       fprintf (file, "  outside\n");
 423       break;
 424
 425     case USE_ADDRESS:
 426       fprintf (file, "  address\n");
 427       break;
 428
 429     case USE_COMPARE:
 430       fprintf (file, "  compare\n");
 431       break;
 432
 433     default:
 434       gcc_unreachable ();
 435     }
 436
 437   fprintf (file, "  in statement ");
 438   print_generic_expr (file, use->stmt, TDF_SLIM);
 439   fprintf (file, "\n");
 440
 441   fprintf (file, "  at position ");
 442   if (use->op_p)
 443     print_generic_expr (file, *use->op_p, TDF_SLIM);
 444   fprintf (file, "\n");
 445
 446   dump_iv (file, use->iv);
 447
 448   if (use->related_cands)
 449     {
 450       fprintf (file, "  related candidates ");
 451       dump_bitmap (file, use->related_cands);
 452     }
 453 }
 454
 455 /* Dumps information about the uses to FILE.  */
 456
 457 extern void dump_uses (FILE *, struct ivopts_data *);
 458 void
 459 dump_uses (FILE *file, struct ivopts_data *data)
 460 {
 461   unsigned i;
 462   struct iv_use *use;
 463
 464   for (i = 0; i < n_iv_uses (data); i++)
 465     {
 466       use = iv_use (data, i);
 467
 468       dump_use (file, use);
 469       fprintf (file, "\n");
 470     }
 471 }
 472
 473 /* Dumps information about induction variable candidate CAND to FILE.  */
 474
 475 extern void dump_cand (FILE *, struct iv_cand *);
 476 void
 477 dump_cand (FILE *file, struct iv_cand *cand)
 478 {
 479   struct iv *iv = cand->iv;
 480
 481   fprintf (file, "candidate %d%s\n",
 482            cand->id, cand->important ? " (important)" : "");
 483
 484   if (!iv)
 485     {
 486       fprintf (file, "  final value replacement\n");
 487       return;
 488     }
 489
 490   switch (cand->pos)
 491     {
 492     case IP_NORMAL:
 493       fprintf (file, "  incremented before exit test\n");
 494       break;
 495
 496     case IP_END:
 497       fprintf (file, "  incremented at end\n");
 498       break;
 499
 500     case IP_ORIGINAL:
 501       fprintf (file, "  original biv\n");
 502       break;
 503     }
 504
 505   dump_iv (file, iv);
 506 }
 507
 508 /* Returns the info for ssa version VER.  */
 509
 510 static inline struct version_info *
 511 ver_info (struct ivopts_data *data, unsigned ver)
 512 {
 513   return data->version_info + ver;
 514 }
 515
 516 /* Returns the info for ssa name NAME.  */
 517
 518 static inline struct version_info *
 519 name_info (struct ivopts_data *data, tree name)
 520 {
 521   return ver_info (data, SSA_NAME_VERSION (name));
 522 }
 523
 524 /* Checks whether there exists number X such that X * B = A, counting modulo
 525    2^BITS.  */
 526
 527 static bool
 528 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 529         HOST_WIDE_INT *x)
 530 {
 531   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 532   unsigned HOST_WIDE_INT inv, ex, val;
 533   unsigned i;
 534
 535   a &= mask;
 536   b &= mask;
 537
 538   /* First divide the whole equation by 2 as long as possible.  */
 539   while (!(a & 1) && !(b & 1))
 540     {
 541       a >>= 1;
 542       b >>= 1;
 543       bits--;
 544       mask >>= 1;
 545     }
 546
 547   if (!(b & 1))
 548     {
 549       /* If b is still even, a is odd and there is no such x.  */
 550       return false;
 551     }
 552
 553   /* Find the inverse of b.  We compute it as
 554      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 555   inv = 1;
 556   ex = b;
 557   for (i = 0; i < bits - 1; i++)
 558     {
 559       inv = (inv * ex) & mask;
 560       ex = (ex * ex) & mask;
 561     }
 562
 563   val = (a * inv) & mask;
 564
 565   gcc_assert (((val * b) & mask) == a);
 566
 567   if ((val >> (bits - 1)) & 1)
 568     val |= ~mask;
 569
 570   *x = val;
 571
 572   return true;
 573 }
 574
 575 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 576    emitted in LOOP.  */
 577
 578 static bool
 579 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 580 {
 581   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 582
 583   gcc_assert (bb);
 584
 585   if (sbb == loop->latch)
 586     return true;
 587
 588   if (sbb != bb)
 589     return false;
 590
 591   return stmt == last_stmt (bb);
 592 }
 593
 594 /* Returns true if STMT if after the place where the original induction
 595    variable CAND is incremented.  */
 596
 597 static bool
 598 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 599 {
 600   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 601   basic_block stmt_bb = bb_for_stmt (stmt);
 602   block_stmt_iterator bsi;
 603
 604   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 605     return false;
 606
 607   if (stmt_bb != cand_bb)
 608     return true;
 609
 610   /* Scan the block from the end, since the original ivs are usually
 611      incremented at the end of the loop body.  */
 612   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 613     {
 614       if (bsi_stmt (bsi) == cand->incremented_at)
 615         return false;
 616       if (bsi_stmt (bsi) == stmt)
 617         return true;
 618     }
 619 }
 620
 621 /* Returns true if STMT if after the place where the induction variable
 622    CAND is incremented in LOOP.  */
 623
 624 static bool
 625 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 626 {
 627   switch (cand->pos)
 628     {
 629     case IP_END:
 630       return false;
 631
 632     case IP_NORMAL:
 633       return stmt_after_ip_normal_pos (loop, stmt);
 634
 635     case IP_ORIGINAL:
 636       return stmt_after_ip_original_pos (cand, stmt);
 637
 638     default:
 639       gcc_unreachable ();
 640     }
 641 }
 642
 643 /* Element of the table in that we cache the numbers of iterations obtained
 644    from exits of the loop.  */
 645
 646 struct nfe_cache_elt
 647 {
 648   /* The edge for that the number of iterations is cached.  */
 649   edge exit;
 650
 651   /* True if the # of iterations was successfully determined.  */
 652   bool valid_p;
 653
 654   /* Description of # of iterations.  */
 655   struct tree_niter_desc niter;
 656 };
 657
 658 /* Hash function for nfe_cache_elt E.  */
 659
 660 static hashval_t
 661 nfe_hash (const void *e)
 662 {
 663   const struct nfe_cache_elt *elt = e;
 664
 665   return htab_hash_pointer (elt->exit);
 666 }
 667
 668 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 669
 670 static int
 671 nfe_eq (const void *e1, const void *e2)
 672 {
 673   const struct nfe_cache_elt *elt1 = e1;
 674
 675   return elt1->exit == e2;
 676 }
 677
 678 /*  Returns structure describing number of iterations determined from
 679     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 680
 681 static struct tree_niter_desc *
 682 niter_for_exit (struct ivopts_data *data, edge exit)
 683 {
 684   struct nfe_cache_elt *nfe_desc;
 685   PTR *slot;
 686
 687   slot = htab_find_slot_with_hash (data->niters, exit,
 688                                    htab_hash_pointer (exit),
 689                                    INSERT);
 690
 691   if (!*slot)
 692     {
 693       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 694       nfe_desc->exit = exit;
 695       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 696                                                      exit, &nfe_desc->niter);
 697       *slot = nfe_desc;
 698     }
 699   else
 700     nfe_desc = *slot;
 701
 702   if (!nfe_desc->valid_p)
 703     return NULL;
 704
 705   return &nfe_desc->niter;
 706 }
 707
 708 /* Returns structure describing number of iterations determined from
 709    single dominating exit of DATA->current_loop, or NULL if something
 710    goes wrong.  */
 711
 712 static struct tree_niter_desc *
 713 niter_for_single_dom_exit (struct ivopts_data *data)
 714 {
 715   edge exit = single_dom_exit (data->current_loop);
 716
 717   if (!exit)
 718     return NULL;
 719
 720   return niter_for_exit (data, exit);
 721 }
 722
 723 /* Initializes data structures used by the iv optimization pass, stored
 724    in DATA.  LOOPS is the loop tree.  */
 725
 726 static void
 727 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 728 {
 729   unsigned i;
 730
 731   data->version_info_size = 2 * num_ssa_names;
 732   data->version_info = xcalloc (data->version_info_size,
 733                                 sizeof (struct version_info));
 734   data->relevant = BITMAP_ALLOC (NULL);
 735   data->important_candidates = BITMAP_ALLOC (NULL);
 736   data->max_inv_id = 0;
 737   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 738
 739   for (i = 1; i < loops->num; i++)
 740     if (loops->parray[i])
 741       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 742
 743   VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
 744   VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
 745   VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
 746 }
 747
 748 /* Returns a memory object to that EXPR points.  In case we are able to
 749    determine that it does not point to any such object, NULL is returned.  */
 750
 751 static tree
 752 determine_base_object (tree expr)
 753 {
 754   enum tree_code code = TREE_CODE (expr);
 755   tree base, obj, op0, op1;
 756
 757   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 758     return NULL_TREE;
 759
 760   switch (code)
 761     {
 762     case INTEGER_CST:
 763       return NULL_TREE;
 764
 765     case ADDR_EXPR:
 766       obj = TREE_OPERAND (expr, 0);
 767       base = get_base_address (obj);
 768
 769       if (!base)
 770         return expr;
 771
 772       if (TREE_CODE (base) == INDIRECT_REF)
 773         return determine_base_object (TREE_OPERAND (base, 0));
 774
 775       return fold (build1 (ADDR_EXPR, ptr_type_node, base));
 776
 777     case PLUS_EXPR:
 778     case MINUS_EXPR:
 779       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 780       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 781
 782       if (!op1)
 783         return op0;
 784
 785       if (!op0)
 786         return (code == PLUS_EXPR
 787                 ? op1
 788                 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
 789
 790       return fold (build (code, ptr_type_node, op0, op1));
 791
 792     case NOP_EXPR:
 793     case CONVERT_EXPR:
 794       return determine_base_object (TREE_OPERAND (expr, 0));
 795
 796     default:
 797       return fold_convert (ptr_type_node, expr);
 798     }
 799 }
 800
 801 /* Allocates an induction variable with given initial value BASE and step STEP
 802    for loop LOOP.  */
 803
 804 static struct iv *
 805 alloc_iv (tree base, tree step)
 806 {
 807   struct iv *iv = xcalloc (1, sizeof (struct iv));
 808
 809   if (step && integer_zerop (step))
 810     step = NULL_TREE;
 811
 812   iv->base = base;
 813   iv->base_object = determine_base_object (base);
 814   iv->step = step;
 815   iv->biv_p = false;
 816   iv->have_use_for = false;
 817   iv->use_id = 0;
 818   iv->ssa_name = NULL_TREE;
 819
 820   return iv;
 821 }
 822
 823 /* Sets STEP and BASE for induction variable IV.  */
 824
 825 static void
 826 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 827 {
 828   struct version_info *info = name_info (data, iv);
 829
 830   gcc_assert (!info->iv);
 831
 832   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 833   info->iv = alloc_iv (base, step);
 834   info->iv->ssa_name = iv;
 835 }
 836
 837 /* Finds induction variable declaration for VAR.  */
 838
 839 static struct iv *
 840 get_iv (struct ivopts_data *data, tree var)
 841 {
 842   basic_block bb;
 843
 844   if (!name_info (data, var)->iv)
 845     {
 846       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 847
 848       if (!bb
 849           || !flow_bb_inside_loop_p (data->current_loop, bb))
 850         set_iv (data, var, var, NULL_TREE);
 851     }
 852
 853   return name_info (data, var)->iv;
 854 }
 855
 856 /* Determines the step of a biv defined in PHI.  */
 857
 858 static tree
 859 determine_biv_step (tree phi)
 860 {
 861   struct loop *loop = bb_for_stmt (phi)->loop_father;
 862   tree name = PHI_RESULT (phi), base, step;
 863   tree type = TREE_TYPE (name);
 864
 865   if (!is_gimple_reg (name))
 866     return NULL_TREE;
 867
 868   if (!simple_iv (loop, phi, name, &base, &step))
 869     return NULL_TREE;
 870
 871   if (!step)
 872     return build_int_cst (type, 0);
 873
 874   return step;
 875 }
 876
 877 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 878
 879 static bool
 880 abnormal_ssa_name_p (tree exp)
 881 {
 882   if (!exp)
 883     return false;
 884
 885   if (TREE_CODE (exp) != SSA_NAME)
 886     return false;
 887
 888   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 889 }
 890
 891 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 892    abnormal phi node.  Callback for for_each_index.  */
 893
 894 static bool
 895 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 896                                   void *data ATTRIBUTE_UNUSED)
 897 {
 898   if (TREE_CODE (base) == ARRAY_REF)
 899     {
 900       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 901         return false;
 902       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 903         return false;
 904     }
 905
 906   return !abnormal_ssa_name_p (*index);
 907 }
 908
 909 /* Returns true if EXPR contains a ssa name that occurs in an
 910    abnormal phi node.  */
 911
 912 static bool
 913 contains_abnormal_ssa_name_p (tree expr)
 914 {
 915   enum tree_code code = TREE_CODE (expr);
 916   enum tree_code_class class = TREE_CODE_CLASS (code);
 917
 918   if (code == SSA_NAME)
 919     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 920
 921   if (code == INTEGER_CST
 922       || is_gimple_min_invariant (expr))
 923     return false;
 924
 925   if (code == ADDR_EXPR)
 926     return !for_each_index (&TREE_OPERAND (expr, 0),
 927                             idx_contains_abnormal_ssa_name_p,
 928                             NULL);
 929
 930   switch (class)
 931     {
 932     case tcc_binary:
 933     case tcc_comparison:
 934       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 935         return true;
 936
 937       /* Fallthru.  */
 938     case tcc_unary:
 939       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 940         return true;
 941
 942       break;
 943
 944     default:
 945       gcc_unreachable ();
 946     }
 947
 948   return false;
 949 }
 950
 951 /* Finds basic ivs.  */
 952
 953 static bool
 954 find_bivs (struct ivopts_data *data)
 955 {
 956   tree phi, step, type, base;
 957   bool found = false;
 958   struct loop *loop = data->current_loop;
 959
 960   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 961     {
 962       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 963         continue;
 964
 965       step = determine_biv_step (phi);
 966
 967       if (!step)
 968         continue;
 969       if (cst_and_fits_in_hwi (step)
 970           && int_cst_value (step) == 0)
 971         continue;
 972
 973       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 974       if (contains_abnormal_ssa_name_p (base))
 975         continue;
 976
 977       type = TREE_TYPE (PHI_RESULT (phi));
 978       base = fold_convert (type, base);
 979       step = fold_convert (type, step);
 980
 981       /* FIXME: We do not handle induction variables whose step does
 982          not satisfy cst_and_fits_in_hwi.  */
 983       if (!cst_and_fits_in_hwi (step))
 984         continue;
 985
 986       set_iv (data, PHI_RESULT (phi), base, step);
 987       found = true;
 988     }
 989
 990   return found;
 991 }
 992
 993 /* Marks basic ivs.  */
 994
 995 static void
 996 mark_bivs (struct ivopts_data *data)
 997 {
 998   tree phi, var;
 999   struct iv *iv, *incr_iv;
1000   struct loop *loop = data->current_loop;
1001   basic_block incr_bb;
1002
1003   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1004     {
1005       iv = get_iv (data, PHI_RESULT (phi));
1006       if (!iv)
1007         continue;
1008
1009       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1010       incr_iv = get_iv (data, var);
1011       if (!incr_iv)
1012         continue;
1013
1014       /* If the increment is in the subloop, ignore it.  */
1015       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1016       if (incr_bb->loop_father != data->current_loop
1017           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1018         continue;
1019
1020       iv->biv_p = true;
1021       incr_iv->biv_p = true;
1022     }
1023 }
1024
1025 /* Checks whether STMT defines a linear induction variable and stores its
1026    parameters to BASE and STEP.  */
1027
1028 static bool
1029 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1030                         tree *base, tree *step)
1031 {
1032   tree lhs;
1033   struct loop *loop = data->current_loop;
1034
1035   *base = NULL_TREE;
1036   *step = NULL_TREE;
1037
1038   if (TREE_CODE (stmt) != MODIFY_EXPR)
1039     return false;
1040
1041   lhs = TREE_OPERAND (stmt, 0);
1042   if (TREE_CODE (lhs) != SSA_NAME)
1043     return false;
1044
1045   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
1046     return false;
1047
1048   /* FIXME: We do not handle induction variables whose step does
1049      not satisfy cst_and_fits_in_hwi.  */
1050   if (!zero_p (*step)
1051       && !cst_and_fits_in_hwi (*step))
1052     return false;
1053
1054   if (contains_abnormal_ssa_name_p (*base))
1055     return false;
1056
1057   return true;
1058 }
1059
1060 /* Finds general ivs in statement STMT.  */
1061
1062 static void
1063 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1064 {
1065   tree base, step;
1066
1067   if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1068     return;
1069
1070   set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1071 }
1072
1073 /* Finds general ivs in basic block BB.  */
1074
1075 static void
1076 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1077 {
1078   block_stmt_iterator bsi;
1079
1080   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1081     find_givs_in_stmt (data, bsi_stmt (bsi));
1082 }
1083
1084 /* Finds general ivs.  */
1085
1086 static void
1087 find_givs (struct ivopts_data *data)
1088 {
1089   struct loop *loop = data->current_loop;
1090   basic_block *body = get_loop_body_in_dom_order (loop);
1091   unsigned i;
1092
1093   for (i = 0; i < loop->num_nodes; i++)
1094     find_givs_in_bb (data, body[i]);
1095   free (body);
1096 }
1097
1098 /* For each ssa name defined in LOOP determines whether it is an induction
1099    variable and if so, its initial value and step.  */
1100
1101 static bool
1102 find_induction_variables (struct ivopts_data *data)
1103 {
1104   unsigned i;
1105   bitmap_iterator bi;
1106
1107   if (!find_bivs (data))
1108     return false;
1109
1110   find_givs (data);
1111   mark_bivs (data);
1112
1113   if (dump_file && (dump_flags & TDF_DETAILS))
1114     {
1115       struct tree_niter_desc *niter;
1116
1117       niter = niter_for_single_dom_exit (data);
1118
1119       if (niter)
1120         {
1121           fprintf (dump_file, "  number of iterations ");
1122           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1123           fprintf (dump_file, "\n");
1124
1125           fprintf (dump_file, "  may be zero if ");
1126           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1127           fprintf (dump_file, "\n");
1128           fprintf (dump_file, "\n");
1129         };
1130
1131       fprintf (dump_file, "Induction variables:\n\n");
1132
1133       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1134         {
1135           if (ver_info (data, i)->iv)
1136             dump_iv (dump_file, ver_info (data, i)->iv);
1137         }
1138     }
1139
1140   return true;
1141 }
1142
1143 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1144
1145 static struct iv_use *
1146 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1147             tree stmt, enum use_type use_type)
1148 {
1149   struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1150
1151   use->id = n_iv_uses (data);
1152   use->type = use_type;
1153   use->iv = iv;
1154   use->stmt = stmt;
1155   use->op_p = use_p;
1156   use->related_cands = BITMAP_ALLOC (NULL);
1157
1158   /* To avoid showing ssa name in the dumps, if it was not reset by the
1159      caller.  */
1160   iv->ssa_name = NULL_TREE;
1161
1162   if (dump_file && (dump_flags & TDF_DETAILS))
1163     dump_use (dump_file, use);
1164
1165   VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1166
1167   return use;
1168 }
1169
1170 /* Checks whether OP is a loop-level invariant and if so, records it.
1171    NONLINEAR_USE is true if the invariant is used in a way we do not
1172    handle specially.  */
1173
1174 static void
1175 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1176 {
1177   basic_block bb;
1178   struct version_info *info;
1179
1180   if (TREE_CODE (op) != SSA_NAME
1181       || !is_gimple_reg (op))
1182     return;
1183
1184   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1185   if (bb
1186       && flow_bb_inside_loop_p (data->current_loop, bb))
1187     return;
1188
1189   info = name_info (data, op);
1190   info->name = op;
1191   info->has_nonlin_use |= nonlinear_use;
1192   if (!info->inv_id)
1193     info->inv_id = ++data->max_inv_id;
1194   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1195 }
1196
1197 /* Checks whether the use OP is interesting and if so, records it
1198    as TYPE.  */
1199
1200 static struct iv_use *
1201 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1202                                        enum use_type type)
1203 {
1204   struct iv *iv;
1205   struct iv *civ;
1206   tree stmt;
1207   struct iv_use *use;
1208
1209   if (TREE_CODE (op) != SSA_NAME)
1210     return NULL;
1211
1212   iv = get_iv (data, op);
1213   if (!iv)
1214     return NULL;
1215
1216   if (iv->have_use_for)
1217     {
1218       use = iv_use (data, iv->use_id);
1219
1220       gcc_assert (use->type == USE_NONLINEAR_EXPR
1221                   || use->type == USE_OUTER);
1222
1223       if (type == USE_NONLINEAR_EXPR)
1224         use->type = USE_NONLINEAR_EXPR;
1225       return use;
1226     }
1227
1228   if (zero_p (iv->step))
1229     {
1230       record_invariant (data, op, true);
1231       return NULL;
1232     }
1233   iv->have_use_for = true;
1234
1235   civ = xmalloc (sizeof (struct iv));
1236   *civ = *iv;
1237
1238   stmt = SSA_NAME_DEF_STMT (op);
1239   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1240               || TREE_CODE (stmt) == MODIFY_EXPR);
1241
1242   use = record_use (data, NULL, civ, stmt, type);
1243   iv->use_id = use->id;
1244
1245   return use;
1246 }
1247
1248 /* Checks whether the use OP is interesting and if so, records it.  */
1249
1250 static struct iv_use *
1251 find_interesting_uses_op (struct ivopts_data *data, tree op)
1252 {
1253   return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1254 }
1255
1256 /* Records a definition of induction variable OP that is used outside of the
1257    loop.  */
1258
1259 static struct iv_use *
1260 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1261 {
1262   return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1263 }
1264
1265 /* Checks whether the condition *COND_P in STMT is interesting
1266    and if so, records it.  */
1267
1268 static void
1269 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1270 {
1271   tree *op0_p;
1272   tree *op1_p;
1273   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1274   struct iv const_iv;
1275   tree zero = integer_zero_node;
1276
1277   const_iv.step = NULL_TREE;
1278
1279   if (integer_zerop (*cond_p)
1280       || integer_nonzerop (*cond_p))
1281     return;
1282
1283   if (TREE_CODE (*cond_p) == SSA_NAME)
1284     {
1285       op0_p = cond_p;
1286       op1_p = &zero;
1287     }
1288   else
1289     {
1290       op0_p = &TREE_OPERAND (*cond_p, 0);
1291       op1_p = &TREE_OPERAND (*cond_p, 1);
1292     }
1293
1294   if (TREE_CODE (*op0_p) == SSA_NAME)
1295     iv0 = get_iv (data, *op0_p);
1296   else
1297     iv0 = &const_iv;
1298
1299   if (TREE_CODE (*op1_p) == SSA_NAME)
1300     iv1 = get_iv (data, *op1_p);
1301   else
1302     iv1 = &const_iv;
1303
1304   if (/* When comparing with non-invariant value, we may not do any senseful
1305          induction variable elimination.  */
1306       (!iv0 || !iv1)
1307       /* Eliminating condition based on two ivs would be nontrivial.
1308          ??? TODO -- it is not really important to handle this case.  */
1309       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1310     {
1311       find_interesting_uses_op (data, *op0_p);
1312       find_interesting_uses_op (data, *op1_p);
1313       return;
1314     }
1315
1316   if (zero_p (iv0->step) && zero_p (iv1->step))
1317     {
1318       /* If both are invariants, this is a work for unswitching.  */
1319       return;
1320     }
1321
1322   civ = xmalloc (sizeof (struct iv));
1323   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1324   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1325 }
1326
1327 /* Returns true if expression EXPR is obviously invariant in LOOP,
1328    i.e. if all its operands are defined outside of the LOOP.  */
1329
1330 bool
1331 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1332 {
1333   basic_block def_bb;
1334   unsigned i, len;
1335
1336   if (is_gimple_min_invariant (expr))
1337     return true;
1338
1339   if (TREE_CODE (expr) == SSA_NAME)
1340     {
1341       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1342       if (def_bb
1343           && flow_bb_inside_loop_p (loop, def_bb))
1344         return false;
1345
1346       return true;
1347     }
1348
1349   if (!EXPR_P (expr))
1350     return false;
1351
1352   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1353   for (i = 0; i < len; i++)
1354     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1355       return false;
1356
1357   return true;
1358 }
1359
1360 /* Cumulates the steps of indices into DATA and replaces their values with the
1361    initial ones.  Returns false when the value of the index cannot be determined.
1362    Callback for for_each_index.  */
1363
1364 struct ifs_ivopts_data
1365 {
1366   struct ivopts_data *ivopts_data;
1367   tree stmt;
1368   tree *step_p;
1369 };
1370
1371 static bool
1372 idx_find_step (tree base, tree *idx, void *data)
1373 {
1374   struct ifs_ivopts_data *dta = data;
1375   struct iv *iv;
1376   tree step, type, iv_type, iv_step, lbound, off;
1377   struct loop *loop = dta->ivopts_data->current_loop;
1378
1379   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1380       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1381     return false;
1382
1383   /* If base is a component ref, require that the offset of the reference
1384      be invariant.  */
1385   if (TREE_CODE (base) == COMPONENT_REF)
1386     {
1387       off = component_ref_field_offset (base);
1388       return expr_invariant_in_loop_p (loop, off);
1389     }
1390
1391   /* If base is array, first check whether we will be able to move the
1392      reference out of the loop (in order to take its address in strength
1393      reduction).  In order for this to work we need both lower bound
1394      and step to be loop invariants.  */
1395   if (TREE_CODE (base) == ARRAY_REF)
1396     {
1397       step = array_ref_element_size (base);
1398       lbound = array_ref_low_bound (base);
1399
1400       if (!expr_invariant_in_loop_p (loop, step)
1401           || !expr_invariant_in_loop_p (loop, lbound))
1402         return false;
1403     }
1404
1405   if (TREE_CODE (*idx) != SSA_NAME)
1406     return true;
1407
1408   iv = get_iv (dta->ivopts_data, *idx);
1409   if (!iv)
1410     return false;
1411
1412   *idx = iv->base;
1413
1414   if (!iv->step)
1415     return true;
1416
1417   iv_type = TREE_TYPE (iv->base);
1418   type = build_pointer_type (TREE_TYPE (base));
1419   if (TREE_CODE (base) == ARRAY_REF)
1420     {
1421       step = array_ref_element_size (base);
1422
1423       /* We only handle addresses whose step is an integer constant.  */
1424       if (TREE_CODE (step) != INTEGER_CST)
1425         return false;
1426     }
1427   else
1428     /* The step for pointer arithmetics already is 1 byte.  */
1429     step = build_int_cst (type, 1);
1430
1431   if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1432     iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1433                                           type, iv->base, iv->step, dta->stmt);
1434   else
1435     iv_step = fold_convert (iv_type, iv->step);
1436
1437   if (!iv_step)
1438     {
1439       /* The index might wrap.  */
1440       return false;
1441     }
1442
1443   step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1444
1445   if (!*dta->step_p)
1446     *dta->step_p = step;
1447   else
1448     *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1449                                             *dta->step_p, step);
1450
1451   return true;
1452 }
1453
1454 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1455    object is passed to it in DATA.  */
1456
1457 static bool
1458 idx_record_use (tree base, tree *idx,
1459                 void *data)
1460 {
1461   find_interesting_uses_op (data, *idx);
1462   if (TREE_CODE (base) == ARRAY_REF)
1463     {
1464       find_interesting_uses_op (data, array_ref_element_size (base));
1465       find_interesting_uses_op (data, array_ref_low_bound (base));
1466     }
1467   return true;
1468 }
1469
1470 /* Returns true if memory reference REF may be unaligned.  */
1471
1472 static bool
1473 may_be_unaligned_p (tree ref)
1474 {
1475   tree base;
1476   tree base_type;
1477   HOST_WIDE_INT bitsize;
1478   HOST_WIDE_INT bitpos;
1479   tree toffset;
1480   enum machine_mode mode;
1481   int unsignedp, volatilep;
1482   unsigned base_align;
1483
1484   /* The test below is basically copy of what expr.c:normal_inner_ref
1485      does to check whether the object must be loaded by parts when
1486      STRICT_ALIGNMENT is true.  */
1487   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1488                               &unsignedp, &volatilep, true);
1489   base_type = TREE_TYPE (base);
1490   base_align = TYPE_ALIGN (base_type);
1491
1492   if (mode != BLKmode
1493       && (base_align < GET_MODE_ALIGNMENT (mode)
1494           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1495           || bitpos % BITS_PER_UNIT != 0))
1496     return true;
1497
1498   return false;
1499 }
1500
1501 /* Finds addresses in *OP_P inside STMT.  */
1502
1503 static void
1504 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1505 {
1506   tree base = unshare_expr (*op_p), step = NULL;
1507   struct iv *civ;
1508   struct ifs_ivopts_data ifs_ivopts_data;
1509
1510   /* Do not play with volatile memory references.  A bit too conservative,
1511      perhaps, but safe.  */
1512   if (stmt_ann (stmt)->has_volatile_ops)
1513     goto fail;
1514
1515   /* Ignore bitfields for now.  Not really something terribly complicated
1516      to handle.  TODO.  */
1517   if (TREE_CODE (base) == COMPONENT_REF
1518       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1519     goto fail;
1520
1521   if (STRICT_ALIGNMENT
1522       && may_be_unaligned_p (base))
1523     goto fail;
1524
1525   ifs_ivopts_data.ivopts_data = data;
1526   ifs_ivopts_data.stmt = stmt;
1527   ifs_ivopts_data.step_p = &step;
1528   if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1529       || zero_p (step))
1530     goto fail;
1531
1532   gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1533   gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1534
1535   if (TREE_CODE (base) == INDIRECT_REF)
1536     base = TREE_OPERAND (base, 0);
1537   else
1538     base = build_addr (base);
1539
1540   civ = alloc_iv (base, step);
1541   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1542   return;
1543
1544 fail:
1545   for_each_index (op_p, idx_record_use, data);
1546 }
1547
1548 /* Finds and records invariants used in STMT.  */
1549
1550 static void
1551 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1552 {
1553   use_optype uses = NULL;
1554   unsigned i, n;
1555   tree op;
1556
1557   if (TREE_CODE (stmt) == PHI_NODE)
1558     n = PHI_NUM_ARGS (stmt);
1559   else
1560     {
1561       get_stmt_operands (stmt);
1562       uses = STMT_USE_OPS (stmt);
1563       n = NUM_USES (uses);
1564     }
1565
1566   for (i = 0; i < n; i++)
1567     {
1568       if (TREE_CODE (stmt) == PHI_NODE)
1569         op = PHI_ARG_DEF (stmt, i);
1570       else
1571         op = USE_OP (uses, i);
1572
1573       record_invariant (data, op, false);
1574     }
1575 }
1576
1577 /* Finds interesting uses of induction variables in the statement STMT.  */
1578
1579 static void
1580 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1581 {
1582   struct iv *iv;
1583   tree op, lhs, rhs;
1584   use_optype uses = NULL;
1585   unsigned i, n;
1586
1587   find_invariants_stmt (data, stmt);
1588
1589   if (TREE_CODE (stmt) == COND_EXPR)
1590     {
1591       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1592       return;
1593     }
1594
1595   if (TREE_CODE (stmt) == MODIFY_EXPR)
1596     {
1597       lhs = TREE_OPERAND (stmt, 0);
1598       rhs = TREE_OPERAND (stmt, 1);
1599
1600       if (TREE_CODE (lhs) == SSA_NAME)
1601         {
1602           /* If the statement defines an induction variable, the uses are not
1603              interesting by themselves.  */
1604
1605           iv = get_iv (data, lhs);
1606
1607           if (iv && !zero_p (iv->step))
1608             return;
1609         }
1610
1611       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1612         {
1613         case tcc_comparison:
1614           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1615           return;
1616
1617         case tcc_reference:
1618           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1619           if (REFERENCE_CLASS_P (lhs))
1620             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1621           return;
1622
1623         default: ;
1624         }
1625
1626       if (REFERENCE_CLASS_P (lhs)
1627           && is_gimple_val (rhs))
1628         {
1629           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1630           find_interesting_uses_op (data, rhs);
1631           return;
1632         }
1633
1634       /* TODO -- we should also handle address uses of type
1635
1636          memory = call (whatever);
1637
1638          and
1639
1640          call (memory).  */
1641     }
1642
1643   if (TREE_CODE (stmt) == PHI_NODE
1644       && bb_for_stmt (stmt) == data->current_loop->header)
1645     {
1646       lhs = PHI_RESULT (stmt);
1647       iv = get_iv (data, lhs);
1648
1649       if (iv && !zero_p (iv->step))
1650         return;
1651     }
1652
1653   if (TREE_CODE (stmt) == PHI_NODE)
1654     n = PHI_NUM_ARGS (stmt);
1655   else
1656     {
1657       uses = STMT_USE_OPS (stmt);
1658       n = NUM_USES (uses);
1659     }
1660
1661   for (i = 0; i < n; i++)
1662     {
1663       if (TREE_CODE (stmt) == PHI_NODE)
1664         op = PHI_ARG_DEF (stmt, i);
1665       else
1666         op = USE_OP (uses, i);
1667
1668       if (TREE_CODE (op) != SSA_NAME)
1669         continue;
1670
1671       iv = get_iv (data, op);
1672       if (!iv)
1673         continue;
1674
1675       find_interesting_uses_op (data, op);
1676     }
1677 }
1678
1679 /* Finds interesting uses of induction variables outside of loops
1680    on loop exit edge EXIT.  */
1681
1682 static void
1683 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1684 {
1685   tree phi, def;
1686
1687   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1688     {
1689       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1690       find_interesting_uses_outer (data, def);
1691     }
1692 }
1693
1694 /* Finds uses of the induction variables that are interesting.  */
1695
1696 static void
1697 find_interesting_uses (struct ivopts_data *data)
1698 {
1699   basic_block bb;
1700   block_stmt_iterator bsi;
1701   tree phi;
1702   basic_block *body = get_loop_body (data->current_loop);
1703   unsigned i;
1704   struct version_info *info;
1705   edge e;
1706
1707   if (dump_file && (dump_flags & TDF_DETAILS))
1708     fprintf (dump_file, "Uses:\n\n");
1709
1710   for (i = 0; i < data->current_loop->num_nodes; i++)
1711     {
1712       edge_iterator ei;
1713       bb = body[i];
1714
1715       FOR_EACH_EDGE (e, ei, bb->succs)
1716         if (e->dest != EXIT_BLOCK_PTR
1717             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1718           find_interesting_uses_outside (data, e);
1719
1720       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1721         find_interesting_uses_stmt (data, phi);
1722       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1723         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1724     }
1725
1726   if (dump_file && (dump_flags & TDF_DETAILS))
1727     {
1728       bitmap_iterator bi;
1729
1730       fprintf (dump_file, "\n");
1731
1732       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1733         {
1734           info = ver_info (data, i);
1735           if (info->inv_id)
1736             {
1737               fprintf (dump_file, "  ");
1738               print_generic_expr (dump_file, info->name, TDF_SLIM);
1739               fprintf (dump_file, " is invariant (%d)%s\n",
1740                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1741             }
1742         }
1743
1744       fprintf (dump_file, "\n");
1745     }
1746
1747   free (body);
1748 }
1749
1750 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1751    is true, assume we are inside an address.  */
1752
1753 static tree
1754 strip_offset (tree expr, bool inside_addr, unsigned HOST_WIDE_INT *offset)
1755 {
1756   tree op0 = NULL_TREE, op1 = NULL_TREE, step;
1757   enum tree_code code;
1758   tree type, orig_type = TREE_TYPE (expr);
1759   unsigned HOST_WIDE_INT off0, off1, st;
1760   tree orig_expr = expr;
1761
1762   STRIP_NOPS (expr);
1763   type = TREE_TYPE (expr);
1764   code = TREE_CODE (expr);
1765   *offset = 0;
1766
1767   switch (code)
1768     {
1769     case INTEGER_CST:
1770       if (!cst_and_fits_in_hwi (expr)
1771           || zero_p (expr))
1772         return orig_expr;
1773
1774       *offset = int_cst_value (expr);
1775       return build_int_cst_type (orig_type, 0);
1776
1777     case PLUS_EXPR:
1778     case MINUS_EXPR:
1779       op0 = TREE_OPERAND (expr, 0);
1780       op1 = TREE_OPERAND (expr, 1);
1781
1782       op0 = strip_offset (op0, false, &off0);
1783       op1 = strip_offset (op1, false, &off1);
1784
1785       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1786       if (op0 == TREE_OPERAND (expr, 0)
1787           && op1 == TREE_OPERAND (expr, 1))
1788         return orig_expr;
1789
1790       if (zero_p (op1))
1791         expr = op0;
1792       else if (zero_p (op0))
1793         {
1794           if (code == PLUS_EXPR)
1795             expr = op1;
1796           else
1797             expr = build1 (NEGATE_EXPR, type, op1);
1798         }
1799       else
1800         expr = build2 (code, type, op0, op1);
1801
1802       return fold_convert (orig_type, expr);
1803
1804     case ARRAY_REF:
1805       if (!inside_addr)
1806         return orig_expr;
1807
1808       step = array_ref_element_size (expr);
1809       if (!cst_and_fits_in_hwi (step))
1810         break;
1811
1812       st = int_cst_value (step);
1813       op1 = TREE_OPERAND (expr, 1);
1814       op1 = strip_offset (op1, false, &off1);
1815       *offset = off1 * st;
1816       break;
1817
1818     case COMPONENT_REF:
1819       if (!inside_addr)
1820         return orig_expr;
1821       break;
1822
1823     case ADDR_EXPR:
1824       inside_addr = true;
1825       break;
1826
1827     default:
1828       return orig_expr;
1829     }
1830
1831   /* Default handling of expressions for that we want to recurse into
1832      the first operand.  */
1833   op0 = TREE_OPERAND (expr, 0);
1834   op0 = strip_offset (op0, inside_addr, &off0);
1835   *offset += off0;
1836
1837   if (op0 == TREE_OPERAND (expr, 0)
1838       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1839     return orig_expr;
1840
1841   expr = copy_node (expr);
1842   TREE_OPERAND (expr, 0) = op0;
1843   if (op1)
1844     TREE_OPERAND (expr, 1) = op1;
1845
1846   return fold_convert (orig_type, expr);
1847 }
1848
1849 /* Returns variant of TYPE that can be used as base for different uses.
1850    For integer types, we return unsigned variant of the type, which
1851    avoids problems with overflows.  For pointer types, we return void *.  */
1852
1853 static tree
1854 generic_type_for (tree type)
1855 {
1856   if (POINTER_TYPE_P (type))
1857     return ptr_type_node;
1858
1859   if (TYPE_UNSIGNED (type))
1860     return type;
1861
1862   return unsigned_type_for (type);
1863 }
1864
1865 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1866    position to POS.  If USE is not NULL, the candidate is set as related to
1867    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1868    replacement of the final value of the iv by a direct computation.  */
1869
1870 static struct iv_cand *
1871 add_candidate_1 (struct ivopts_data *data,
1872                  tree base, tree step, bool important, enum iv_position pos,
1873                  struct iv_use *use, tree incremented_at)
1874 {
1875   unsigned i;
1876   struct iv_cand *cand = NULL;
1877   tree type, orig_type;
1878
1879   if (base)
1880     {
1881       orig_type = TREE_TYPE (base);
1882       type = generic_type_for (orig_type);
1883       if (type != orig_type)
1884         {
1885           base = fold_convert (type, base);
1886           if (step)
1887             step = fold_convert (type, step);
1888         }
1889     }
1890
1891   for (i = 0; i < n_iv_cands (data); i++)
1892     {
1893       cand = iv_cand (data, i);
1894
1895       if (cand->pos != pos)
1896         continue;
1897
1898       if (cand->incremented_at != incremented_at)
1899         continue;
1900
1901       if (!cand->iv)
1902         {
1903           if (!base && !step)
1904             break;
1905
1906           continue;
1907         }
1908
1909       if (!base && !step)
1910         continue;
1911
1912       if (!operand_equal_p (base, cand->iv->base, 0))
1913         continue;
1914
1915       if (zero_p (cand->iv->step))
1916         {
1917           if (zero_p (step))
1918             break;
1919         }
1920       else
1921         {
1922           if (step && operand_equal_p (step, cand->iv->step, 0))
1923             break;
1924         }
1925     }
1926
1927   if (i == n_iv_cands (data))
1928     {
1929       cand = xcalloc (1, sizeof (struct iv_cand));
1930       cand->id = i;
1931
1932       if (!base && !step)
1933         cand->iv = NULL;
1934       else
1935         cand->iv = alloc_iv (base, step);
1936
1937       cand->pos = pos;
1938       if (pos != IP_ORIGINAL && cand->iv)
1939         {
1940           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1941           cand->var_after = cand->var_before;
1942         }
1943       cand->important = important;
1944       cand->incremented_at = incremented_at;
1945       VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1946
1947       if (dump_file && (dump_flags & TDF_DETAILS))
1948         dump_cand (dump_file, cand);
1949     }
1950
1951   if (important && !cand->important)
1952     {
1953       cand->important = true;
1954       if (dump_file && (dump_flags & TDF_DETAILS))
1955         fprintf (dump_file, "Candidate %d is important\n", cand->id);
1956     }
1957
1958   if (use)
1959     {
1960       bitmap_set_bit (use->related_cands, i);
1961       if (dump_file && (dump_flags & TDF_DETAILS))
1962         fprintf (dump_file, "Candidate %d is related to use %d\n",
1963                  cand->id, use->id);
1964     }
1965
1966   return cand;
1967 }
1968
1969 /* Returns true if incrementing the induction variable at the end of the LOOP
1970    is allowed.
1971
1972    The purpose is to avoid splitting latch edge with a biv increment, thus
1973    creating a jump, possibly confusing other optimization passes and leaving
1974    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
1975    is not available (so we do not have a better alternative), or if the latch
1976    edge is already nonempty.  */
1977
1978 static bool
1979 allow_ip_end_pos_p (struct loop *loop)
1980 {
1981   if (!ip_normal_pos (loop))
1982     return true;
1983
1984   if (!empty_block_p (ip_end_pos (loop)))
1985     return true;
1986
1987   return false;
1988 }
1989
1990 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1991    position to POS.  If USE is not NULL, the candidate is set as related to
1992    it.  The candidate computation is scheduled on all available positions.  */
1993
1994 static void
1995 add_candidate (struct ivopts_data *data,
1996                tree base, tree step, bool important, struct iv_use *use)
1997 {
1998   if (ip_normal_pos (data->current_loop))
1999     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2000   if (ip_end_pos (data->current_loop)
2001       && allow_ip_end_pos_p (data->current_loop))
2002     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2003 }
2004
2005 /* Add a standard "0 + 1 * iteration" iv candidate for a
2006    type with SIZE bits.  */
2007
2008 static void
2009 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2010                                      unsigned int size)
2011 {
2012   tree type = lang_hooks.types.type_for_size (size, true);
2013   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2014                  true, NULL);
2015 }
2016
2017 /* Adds standard iv candidates.  */
2018
2019 static void
2020 add_standard_iv_candidates (struct ivopts_data *data)
2021 {
2022   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2023
2024   /* The same for a double-integer type if it is still fast enough.  */
2025   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2026     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2027 }
2028
2029
2030 /* Adds candidates bases on the old induction variable IV.  */
2031
2032 static void
2033 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2034 {
2035   tree phi, def;
2036   struct iv_cand *cand;
2037
2038   add_candidate (data, iv->base, iv->step, true, NULL);
2039
2040   /* The same, but with initial value zero.  */
2041   add_candidate (data,
2042                  build_int_cst (TREE_TYPE (iv->base), 0),
2043                  iv->step, true, NULL);
2044
2045   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2046   if (TREE_CODE (phi) == PHI_NODE)
2047     {
2048       /* Additionally record the possibility of leaving the original iv
2049          untouched.  */
2050       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2051       cand = add_candidate_1 (data,
2052                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2053                               SSA_NAME_DEF_STMT (def));
2054       cand->var_before = iv->ssa_name;
2055       cand->var_after = def;
2056     }
2057 }
2058
2059 /* Adds candidates based on the old induction variables.  */
2060
2061 static void
2062 add_old_ivs_candidates (struct ivopts_data *data)
2063 {
2064   unsigned i;
2065   struct iv *iv;
2066   bitmap_iterator bi;
2067
2068   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2069     {
2070       iv = ver_info (data, i)->iv;
2071       if (iv && iv->biv_p && !zero_p (iv->step))
2072         add_old_iv_candidates (data, iv);
2073     }
2074 }
2075
2076 /* Adds candidates based on the value of the induction variable IV and USE.  */
2077
2078 static void
2079 add_iv_value_candidates (struct ivopts_data *data,
2080                          struct iv *iv, struct iv_use *use)
2081 {
2082   add_candidate (data, iv->base, iv->step, false, use);
2083
2084   /* The same, but with initial value zero.  */
2085   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2086                  iv->step, false, use);
2087 }
2088
2089 /* Adds candidates based on the address IV and USE.  */
2090
2091 static void
2092 add_address_candidates (struct ivopts_data *data,
2093                         struct iv *iv, struct iv_use *use)
2094 {
2095   tree base, abase;
2096   unsigned HOST_WIDE_INT offset;
2097
2098   /* First, the trivial choices.  */
2099   add_iv_value_candidates (data, iv, use);
2100
2101   /* Second, try removing the COMPONENT_REFs.  */
2102   if (TREE_CODE (iv->base) == ADDR_EXPR)
2103     {
2104       base = TREE_OPERAND (iv->base, 0);
2105       while (TREE_CODE (base) == COMPONENT_REF
2106              || (TREE_CODE (base) == ARRAY_REF
2107                  && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
2108         base = TREE_OPERAND (base, 0);
2109
2110       if (base != TREE_OPERAND (iv->base, 0))
2111         {
2112           gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
2113           gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
2114
2115           if (TREE_CODE (base) == INDIRECT_REF)
2116             base = TREE_OPERAND (base, 0);
2117           else
2118             base = build_addr (base);
2119           add_candidate (data, base, iv->step, false, use);
2120         }
2121     }
2122
2123   /* Third, try removing the constant offset.  */
2124   abase = iv->base;
2125   base = strip_offset (abase, false, &offset);
2126   if (offset)
2127     add_candidate (data, base, iv->step, false, use);
2128 }
2129
2130 /* Possibly adds pseudocandidate for replacing the final value of USE by
2131    a direct computation.  */
2132
2133 static void
2134 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2135 {
2136   struct tree_niter_desc *niter;
2137
2138   /* We must know where we exit the loop and how many times does it roll.  */
2139   niter = niter_for_single_dom_exit (data);
2140   if (!niter
2141       || !zero_p (niter->may_be_zero))
2142     return;
2143
2144   add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2145 }
2146
2147 /* Adds candidates based on the uses.  */
2148
2149 static void
2150 add_derived_ivs_candidates (struct ivopts_data *data)
2151 {
2152   unsigned i;
2153
2154   for (i = 0; i < n_iv_uses (data); i++)
2155     {
2156       struct iv_use *use = iv_use (data, i);
2157
2158       if (!use)
2159         continue;
2160
2161       switch (use->type)
2162         {
2163         case USE_NONLINEAR_EXPR:
2164         case USE_COMPARE:
2165           /* Just add the ivs based on the value of the iv used here.  */
2166           add_iv_value_candidates (data, use->iv, use);
2167           break;
2168
2169         case USE_OUTER:
2170           add_iv_value_candidates (data, use->iv, use);
2171
2172           /* Additionally, add the pseudocandidate for the possibility to
2173              replace the final value by a direct computation.  */
2174           add_iv_outer_candidates (data, use);
2175           break;
2176
2177         case USE_ADDRESS:
2178           add_address_candidates (data, use->iv, use);
2179           break;
2180
2181         default:
2182           gcc_unreachable ();
2183         }
2184     }
2185 }
2186
2187 /* Record important candidates and add them to related_cands bitmaps
2188    if needed.  */
2189
2190 static void
2191 record_important_candidates (struct ivopts_data *data)
2192 {
2193   unsigned i;
2194   struct iv_use *use;
2195
2196   for (i = 0; i < n_iv_cands (data); i++)
2197     {
2198       struct iv_cand *cand = iv_cand (data, i);
2199
2200       if (cand->important)
2201         bitmap_set_bit (data->important_candidates, i);
2202     }
2203
2204   data->consider_all_candidates = (n_iv_cands (data)
2205                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2206
2207   if (data->consider_all_candidates)
2208     {
2209       /* We will not need "related_cands" bitmaps in this case,
2210          so release them to decrease peak memory consumption.  */
2211       for (i = 0; i < n_iv_uses (data); i++)
2212         {
2213           use = iv_use (data, i);
2214           BITMAP_FREE (use->related_cands);
2215         }
2216     }
2217   else
2218     {
2219       /* Add important candidates to the related_cands bitmaps.  */
2220       for (i = 0; i < n_iv_uses (data); i++)
2221         bitmap_ior_into (iv_use (data, i)->related_cands,
2222                          data->important_candidates);
2223     }
2224 }
2225
2226 /* Finds the candidates for the induction variables.  */
2227
2228 static void
2229 find_iv_candidates (struct ivopts_data *data)
2230 {
2231   /* Add commonly used ivs.  */
2232   add_standard_iv_candidates (data);
2233
2234   /* Add old induction variables.  */
2235   add_old_ivs_candidates (data);
2236
2237   /* Add induction variables derived from uses.  */
2238   add_derived_ivs_candidates (data);
2239
2240   /* Record the important candidates.  */
2241   record_important_candidates (data);
2242 }
2243
2244 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2245    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2246    we allocate a simple list to every use.  */
2247
2248 static void
2249 alloc_use_cost_map (struct ivopts_data *data)
2250 {
2251   unsigned i, size, s, j;
2252
2253   for (i = 0; i < n_iv_uses (data); i++)
2254     {
2255       struct iv_use *use = iv_use (data, i);
2256       bitmap_iterator bi;
2257
2258       if (data->consider_all_candidates)
2259         size = n_iv_cands (data);
2260       else
2261         {
2262           s = 0;
2263           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2264             {
2265               s++;
2266             }
2267
2268           /* Round up to the power of two, so that moduling by it is fast.  */
2269           for (size = 1; size < s; size <<= 1)
2270             continue;
2271         }
2272
2273       use->n_map_members = size;
2274       use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2275     }
2276 }
2277
2278 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2279    on invariants DEPENDS_ON.  */
2280
2281 static void
2282 set_use_iv_cost (struct ivopts_data *data,
2283                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2284                  bitmap depends_on)
2285 {
2286   unsigned i, s;
2287
2288   if (cost == INFTY)
2289     {
2290       BITMAP_FREE (depends_on);
2291       return;
2292     }
2293
2294   if (data->consider_all_candidates)
2295     {
2296       use->cost_map[cand->id].cand = cand;
2297       use->cost_map[cand->id].cost = cost;
2298       use->cost_map[cand->id].depends_on = depends_on;
2299       return;
2300     }
2301
2302   /* n_map_members is a power of two, so this computes modulo.  */
2303   s = cand->id & (use->n_map_members - 1);
2304   for (i = s; i < use->n_map_members; i++)
2305     if (!use->cost_map[i].cand)
2306       goto found;
2307   for (i = 0; i < s; i++)
2308     if (!use->cost_map[i].cand)
2309       goto found;
2310
2311   gcc_unreachable ();
2312
2313 found:
2314   use->cost_map[i].cand = cand;
2315   use->cost_map[i].cost = cost;
2316   use->cost_map[i].depends_on = depends_on;
2317 }
2318
2319 /* Gets cost of (USE, CANDIDATE) pair.  */
2320
2321 static struct cost_pair *
2322 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2323                  struct iv_cand *cand)
2324 {
2325   unsigned i, s;
2326   struct cost_pair *ret;
2327
2328   if (!cand)
2329     return NULL;
2330
2331   if (data->consider_all_candidates)
2332     {
2333       ret = use->cost_map + cand->id;
2334       if (!ret->cand)
2335         return NULL;
2336
2337       return ret;
2338     }
2339
2340   /* n_map_members is a power of two, so this computes modulo.  */
2341   s = cand->id & (use->n_map_members - 1);
2342   for (i = s; i < use->n_map_members; i++)
2343     if (use->cost_map[i].cand == cand)
2344       return use->cost_map + i;
2345
2346   for (i = 0; i < s; i++)
2347     if (use->cost_map[i].cand == cand)
2348       return use->cost_map + i;
2349
2350   return NULL;
2351 }
2352
2353 /* Returns estimate on cost of computing SEQ.  */
2354
2355 static unsigned
2356 seq_cost (rtx seq)
2357 {
2358   unsigned cost = 0;
2359   rtx set;
2360
2361   for (; seq; seq = NEXT_INSN (seq))
2362     {
2363       set = single_set (seq);
2364       if (set)
2365         cost += rtx_cost (set, SET);
2366       else
2367         cost++;
2368     }
2369
2370   return cost;
2371 }
2372
2373 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2374 static rtx
2375 produce_memory_decl_rtl (tree obj, int *regno)
2376 {
2377   rtx x;
2378   if (!obj)
2379     abort ();
2380   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2381     {
2382       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2383       x = gen_rtx_SYMBOL_REF (Pmode, name);
2384     }
2385   else
2386     x = gen_raw_REG (Pmode, (*regno)++);
2387
2388   return gen_rtx_MEM (DECL_MODE (obj), x);
2389 }
2390
2391 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2392    walk_tree.  DATA contains the actual fake register number.  */
2393
2394 static tree
2395 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2396 {
2397   tree obj = NULL_TREE;
2398   rtx x = NULL_RTX;
2399   int *regno = data;
2400
2401   switch (TREE_CODE (*expr_p))
2402     {
2403     case ADDR_EXPR:
2404       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2405            handled_component_p (*expr_p);
2406            expr_p = &TREE_OPERAND (*expr_p, 0))
2407         continue;
2408       obj = *expr_p;
2409       if (DECL_P (obj))
2410         x = produce_memory_decl_rtl (obj, regno);
2411       break;
2412
2413     case SSA_NAME:
2414       *ws = 0;
2415       obj = SSA_NAME_VAR (*expr_p);
2416       if (!DECL_RTL_SET_P (obj))
2417         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2418       break;
2419
2420     case VAR_DECL:
2421     case PARM_DECL:
2422     case RESULT_DECL:
2423       *ws = 0;
2424       obj = *expr_p;
2425
2426       if (DECL_RTL_SET_P (obj))
2427         break;
2428
2429       if (DECL_MODE (obj) == BLKmode)
2430         x = produce_memory_decl_rtl (obj, regno);
2431       else
2432         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2433
2434       break;
2435
2436     default:
2437       break;
2438     }
2439
2440   if (x)
2441     {
2442       VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2443       SET_DECL_RTL (obj, x);
2444     }
2445
2446   return NULL_TREE;
2447 }
2448
2449 /* Determines cost of the computation of EXPR.  */
2450
2451 static unsigned
2452 computation_cost (tree expr)
2453 {
2454   rtx seq, rslt;
2455   tree type = TREE_TYPE (expr);
2456   unsigned cost;
2457   /* Avoid using hard regs in ways which may be unsupported.  */
2458   int regno = LAST_VIRTUAL_REGISTER + 1;
2459
2460   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2461   start_sequence ();
2462   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2463   seq = get_insns ();
2464   end_sequence ();
2465
2466   cost = seq_cost (seq);
2467   if (GET_CODE (rslt) == MEM)
2468     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2469
2470   return cost;
2471 }
2472
2473 /* Returns variable containing the value of candidate CAND at statement AT.  */
2474
2475 static tree
2476 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2477 {
2478   if (stmt_after_increment (loop, cand, stmt))
2479     return cand->var_after;
2480   else
2481     return cand->var_before;
2482 }
2483
2484 /* Determines the expression by that USE is expressed from induction variable
2485    CAND at statement AT in LOOP.  */
2486
2487 static tree
2488 get_computation_at (struct loop *loop,
2489                     struct iv_use *use, struct iv_cand *cand, tree at)
2490 {
2491   tree ubase = use->iv->base;
2492   tree ustep = use->iv->step;
2493   tree cbase = cand->iv->base;
2494   tree cstep = cand->iv->step;
2495   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2496   tree uutype;
2497   tree expr, delta;
2498   tree ratio;
2499   unsigned HOST_WIDE_INT ustepi, cstepi;
2500   HOST_WIDE_INT ratioi;
2501
2502   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2503     {
2504       /* We do not have a precision to express the values of use.  */
2505       return NULL_TREE;
2506     }
2507
2508   expr = var_at_stmt (loop, cand, at);
2509
2510   if (TREE_TYPE (expr) != ctype)
2511     {
2512       /* This may happen with the original ivs.  */
2513       expr = fold_convert (ctype, expr);
2514     }
2515
2516   if (TYPE_UNSIGNED (utype))
2517     uutype = utype;
2518   else
2519     {
2520       uutype = unsigned_type_for (utype);
2521       ubase = fold_convert (uutype, ubase);
2522       ustep = fold_convert (uutype, ustep);
2523     }
2524
2525   if (uutype != ctype)
2526     {
2527       expr = fold_convert (uutype, expr);
2528       cbase = fold_convert (uutype, cbase);
2529       cstep = fold_convert (uutype, cstep);
2530     }
2531
2532   if (!cst_and_fits_in_hwi (cstep)
2533       || !cst_and_fits_in_hwi (ustep))
2534     return NULL_TREE;
2535
2536   ustepi = int_cst_value (ustep);
2537   cstepi = int_cst_value (cstep);
2538
2539   if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2540     {
2541       /* TODO maybe consider case when ustep divides cstep and the ratio is
2542          a power of 2 (so that the division is fast to execute)?  We would
2543          need to be much more careful with overflows etc. then.  */
2544       return NULL_TREE;
2545     }
2546
2547   /* We may need to shift the value if we are after the increment.  */
2548   if (stmt_after_increment (loop, cand, at))
2549     cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2550
2551   /* use = ubase - ratio * cbase + ratio * var.
2552
2553      In general case ubase + ratio * (var - cbase) could be better (one less
2554      multiplication), but often it is possible to eliminate redundant parts
2555      of computations from (ubase - ratio * cbase) term, and if it does not
2556      happen, fold is able to apply the distributive law to obtain this form
2557      anyway.  */
2558
2559   if (ratioi == 1)
2560     {
2561       delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2562       expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2563     }
2564   else if (ratioi == -1)
2565     {
2566       delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2567       expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2568     }
2569   else
2570     {
2571       ratio = build_int_cst_type (uutype, ratioi);
2572       delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2573       delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2574       expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2575       expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2576     }
2577
2578   return fold_convert (utype, expr);
2579 }
2580
2581 /* Determines the expression by that USE is expressed from induction variable
2582    CAND in LOOP.  */
2583
2584 static tree
2585 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2586 {
2587   return get_computation_at (loop, use, cand, use->stmt);
2588 }
2589
2590 /* Returns cost of addition in MODE.  */
2591
2592 static unsigned
2593 add_cost (enum machine_mode mode)
2594 {
2595   static unsigned costs[NUM_MACHINE_MODES];
2596   rtx seq;
2597   unsigned cost;
2598
2599   if (costs[mode])
2600     return costs[mode];
2601
2602   start_sequence ();
2603   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2604                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2605                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2606                  NULL_RTX);
2607   seq = get_insns ();
2608   end_sequence ();
2609
2610   cost = seq_cost (seq);
2611   if (!cost)
2612     cost = 1;
2613
2614   costs[mode] = cost;
2615
2616   if (dump_file && (dump_flags & TDF_DETAILS))
2617     fprintf (dump_file, "Addition in %s costs %d\n",
2618              GET_MODE_NAME (mode), cost);
2619   return cost;
2620 }
2621
2622 /* Entry in a hashtable of already known costs for multiplication.  */
2623 struct mbc_entry
2624 {
2625   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2626   enum machine_mode mode;       /* In mode.  */
2627   unsigned cost;                /* The cost.  */
2628 };
2629
2630 /* Counts hash value for the ENTRY.  */
2631
2632 static hashval_t
2633 mbc_entry_hash (const void *entry)
2634 {
2635   const struct mbc_entry *e = entry;
2636
2637   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2638 }
2639
2640 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2641
2642 static int
2643 mbc_entry_eq (const void *entry1, const void *entry2)
2644 {
2645   const struct mbc_entry *e1 = entry1;
2646   const struct mbc_entry *e2 = entry2;
2647
2648   return (e1->mode == e2->mode
2649           && e1->cst == e2->cst);
2650 }
2651
2652 /* Returns cost of multiplication by constant CST in MODE.  */
2653
2654 static unsigned
2655 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2656 {
2657   static htab_t costs;
2658   struct mbc_entry **cached, act;
2659   rtx seq;
2660   unsigned cost;
2661
2662   if (!costs)
2663     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2664
2665   act.mode = mode;
2666   act.cst = cst;
2667   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2668   if (*cached)
2669     return (*cached)->cost;
2670
2671   *cached = xmalloc (sizeof (struct mbc_entry));
2672   (*cached)->mode = mode;
2673   (*cached)->cst = cst;
2674
2675   start_sequence ();
2676   expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2677                NULL_RTX, 0);
2678   seq = get_insns ();
2679   end_sequence ();
2680
2681   cost = seq_cost (seq);
2682
2683   if (dump_file && (dump_flags & TDF_DETAILS))
2684     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2685              (int) cst, GET_MODE_NAME (mode), cost);
2686
2687   (*cached)->cost = cost;
2688
2689   return cost;
2690 }
2691
2692 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2693    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2694    variable is omitted.  The created memory accesses MODE.
2695
2696    TODO -- there must be some better way.  This all is quite crude.  */
2697
2698 static unsigned
2699 get_address_cost (bool symbol_present, bool var_present,
2700                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2701 {
2702 #define MAX_RATIO 128
2703   static sbitmap valid_mult;
2704   static HOST_WIDE_INT rat, off;
2705   static HOST_WIDE_INT min_offset, max_offset;
2706   static unsigned costs[2][2][2][2];
2707   unsigned cost, acost;
2708   rtx seq, addr, base;
2709   bool offset_p, ratio_p;
2710   rtx reg1;
2711   HOST_WIDE_INT s_offset;
2712   unsigned HOST_WIDE_INT mask;
2713   unsigned bits;
2714
2715   if (!valid_mult)
2716     {
2717       HOST_WIDE_INT i;
2718
2719       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2720
2721       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2722       for (i = 1; i <= 1 << 20; i <<= 1)
2723         {
2724           XEXP (addr, 1) = GEN_INT (i);
2725           if (!memory_address_p (Pmode, addr))
2726             break;
2727         }
2728       max_offset = i >> 1;
2729       off = max_offset;
2730
2731       for (i = 1; i <= 1 << 20; i <<= 1)
2732         {
2733           XEXP (addr, 1) = GEN_INT (-i);
2734           if (!memory_address_p (Pmode, addr))
2735             break;
2736         }
2737       min_offset = -(i >> 1);
2738
2739       if (dump_file && (dump_flags & TDF_DETAILS))
2740         {
2741           fprintf (dump_file, "get_address_cost:\n");
2742           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
2743           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
2744         }
2745
2746       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2747       sbitmap_zero (valid_mult);
2748       rat = 1;
2749       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2750       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2751         {
2752           XEXP (addr, 1) = GEN_INT (i);
2753           if (memory_address_p (Pmode, addr))
2754             {
2755               SET_BIT (valid_mult, i + MAX_RATIO);
2756               rat = i;
2757             }
2758         }
2759
2760       if (dump_file && (dump_flags & TDF_DETAILS))
2761         {
2762           fprintf (dump_file, "  allowed multipliers:");
2763           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2764             if (TEST_BIT (valid_mult, i + MAX_RATIO))
2765               fprintf (dump_file, " %d", (int) i);
2766           fprintf (dump_file, "\n");
2767           fprintf (dump_file, "\n");
2768         }
2769     }
2770
2771   bits = GET_MODE_BITSIZE (Pmode);
2772   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2773   offset &= mask;
2774   if ((offset >> (bits - 1) & 1))
2775     offset |= ~mask;
2776   s_offset = offset;
2777
2778   cost = 0;
2779   offset_p = (s_offset != 0
2780               && min_offset <= s_offset && s_offset <= max_offset);
2781   ratio_p = (ratio != 1
2782              && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2783              && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2784
2785   if (ratio != 1 && !ratio_p)
2786     cost += multiply_by_cost (ratio, Pmode);
2787
2788   if (s_offset && !offset_p && !symbol_present)
2789     {
2790       cost += add_cost (Pmode);
2791       var_present = true;
2792     }
2793
2794   acost = costs[symbol_present][var_present][offset_p][ratio_p];
2795   if (!acost)
2796     {
2797       acost = 0;
2798
2799       addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2800       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2801       if (ratio_p)
2802         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2803
2804       if (var_present)
2805         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2806
2807       if (symbol_present)
2808         {
2809           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2810           if (offset_p)
2811             base = gen_rtx_fmt_e (CONST, Pmode,
2812                                   gen_rtx_fmt_ee (PLUS, Pmode,
2813                                                   base,
2814                                                   GEN_INT (off)));
2815         }
2816       else if (offset_p)
2817         base = GEN_INT (off);
2818       else
2819         base = NULL_RTX;
2820
2821       if (base)
2822         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2823
2824       start_sequence ();
2825       addr = memory_address (Pmode, addr);
2826       seq = get_insns ();
2827       end_sequence ();
2828
2829       acost = seq_cost (seq);
2830       acost += address_cost (addr, Pmode);
2831
2832       if (!acost)
2833         acost = 1;
2834       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2835     }
2836
2837   return cost + acost;
2838 }
2839
2840 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2841    the bitmap to that we should store it.  */
2842
2843 static struct ivopts_data *fd_ivopts_data;
2844 static tree
2845 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2846 {
2847   bitmap *depends_on = data;
2848   struct version_info *info;
2849
2850   if (TREE_CODE (*expr_p) != SSA_NAME)
2851     return NULL_TREE;
2852   info = name_info (fd_ivopts_data, *expr_p);
2853
2854   if (!info->inv_id || info->has_nonlin_use)
2855     return NULL_TREE;
2856
2857   if (!*depends_on)
2858     *depends_on = BITMAP_ALLOC (NULL);
2859   bitmap_set_bit (*depends_on, info->inv_id);
2860
2861   return NULL_TREE;
2862 }
2863
2864 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
2865    invariants the computation depends on.  */
2866
2867 static unsigned
2868 force_var_cost (struct ivopts_data *data,
2869                 tree expr, bitmap *depends_on)
2870 {
2871   static bool costs_initialized = false;
2872   static unsigned integer_cost;
2873   static unsigned symbol_cost;
2874   static unsigned address_cost;
2875   tree op0, op1;
2876   unsigned cost0, cost1, cost;
2877   enum machine_mode mode;
2878
2879   if (!costs_initialized)
2880     {
2881       tree var = create_tmp_var_raw (integer_type_node, "test_var");
2882       rtx x = gen_rtx_MEM (DECL_MODE (var),
2883                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2884       tree addr;
2885       tree type = build_pointer_type (integer_type_node);
2886
2887       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2888                                                            2000));
2889
2890       SET_DECL_RTL (var, x);
2891       TREE_STATIC (var) = 1;
2892       addr = build1 (ADDR_EXPR, type, var);
2893       symbol_cost = computation_cost (addr) + 1;
2894
2895       address_cost
2896         = computation_cost (build2 (PLUS_EXPR, type,
2897                                     addr,
2898                                     build_int_cst_type (type, 2000))) + 1;
2899       if (dump_file && (dump_flags & TDF_DETAILS))
2900         {
2901           fprintf (dump_file, "force_var_cost:\n");
2902           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
2903           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
2904           fprintf (dump_file, "  address %d\n", (int) address_cost);
2905           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
2906           fprintf (dump_file, "\n");
2907         }
2908
2909       costs_initialized = true;
2910     }
2911
2912   STRIP_NOPS (expr);
2913
2914   if (depends_on)
2915     {
2916       fd_ivopts_data = data;
2917       walk_tree (&expr, find_depends, depends_on, NULL);
2918     }
2919
2920   if (SSA_VAR_P (expr))
2921     return 0;
2922
2923   if (TREE_INVARIANT (expr))
2924     {
2925       if (TREE_CODE (expr) == INTEGER_CST)
2926         return integer_cost;
2927
2928       if (TREE_CODE (expr) == ADDR_EXPR)
2929         {
2930           tree obj = TREE_OPERAND (expr, 0);
2931
2932           if (TREE_CODE (obj) == VAR_DECL
2933               || TREE_CODE (obj) == PARM_DECL
2934               || TREE_CODE (obj) == RESULT_DECL)
2935             return symbol_cost;
2936         }
2937
2938       return address_cost;
2939     }
2940
2941   switch (TREE_CODE (expr))
2942     {
2943     case PLUS_EXPR:
2944     case MINUS_EXPR:
2945     case MULT_EXPR:
2946       op0 = TREE_OPERAND (expr, 0);
2947       op1 = TREE_OPERAND (expr, 1);
2948       STRIP_NOPS (op0);
2949       STRIP_NOPS (op1);
2950
2951       if (is_gimple_val (op0))
2952         cost0 = 0;
2953       else
2954         cost0 = force_var_cost (data, op0, NULL);
2955
2956       if (is_gimple_val (op1))
2957         cost1 = 0;
2958       else
2959         cost1 = force_var_cost (data, op1, NULL);
2960
2961       break;
2962
2963     default:
2964       /* Just an arbitrary value, FIXME.  */
2965       return target_spill_cost;
2966     }
2967
2968   mode = TYPE_MODE (TREE_TYPE (expr));
2969   switch (TREE_CODE (expr))
2970     {
2971     case PLUS_EXPR:
2972     case MINUS_EXPR:
2973       cost = add_cost (mode);
2974       break;
2975
2976     case MULT_EXPR:
2977       if (cst_and_fits_in_hwi (op0))
2978         cost = multiply_by_cost (int_cst_value (op0), mode);
2979       else if (cst_and_fits_in_hwi (op1))
2980         cost = multiply_by_cost (int_cst_value (op1), mode);
2981       else
2982         return target_spill_cost;
2983       break;
2984
2985     default:
2986       gcc_unreachable ();
2987     }
2988
2989   cost += cost0;
2990   cost += cost1;
2991
2992   /* Bound the cost by target_spill_cost.  The parts of complicated
2993      computations often are either loop invariant or at least can
2994      be shared between several iv uses, so letting this grow without
2995      limits would not give reasonable results.  */
2996   return cost < target_spill_cost ? cost : target_spill_cost;
2997 }
2998
2999 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3000    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3001    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3002    invariants the computation depends on.  */
3003
3004 static unsigned
3005 split_address_cost (struct ivopts_data *data,
3006                     tree addr, bool *symbol_present, bool *var_present,
3007                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3008 {
3009   tree core;
3010   HOST_WIDE_INT bitsize;
3011   HOST_WIDE_INT bitpos;
3012   tree toffset;
3013   enum machine_mode mode;
3014   int unsignedp, volatilep;
3015
3016   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3017                               &unsignedp, &volatilep, false);
3018
3019   if (toffset != 0
3020       || bitpos % BITS_PER_UNIT != 0
3021       || TREE_CODE (core) != VAR_DECL)
3022     {
3023       *symbol_present = false;
3024       *var_present = true;
3025       fd_ivopts_data = data;
3026       walk_tree (&addr, find_depends, depends_on, NULL);
3027       return target_spill_cost;
3028     }
3029
3030   *offset += bitpos / BITS_PER_UNIT;
3031   if (TREE_STATIC (core)
3032       || DECL_EXTERNAL (core))
3033     {
3034       *symbol_present = true;
3035       *var_present = false;
3036       return 0;
3037     }
3038
3039   *symbol_present = false;
3040   *var_present = true;
3041   return 0;
3042 }
3043
3044 /* Estimates cost of expressing difference of addresses E1 - E2 as
3045    var + symbol + offset.  The value of offset is added to OFFSET,
3046    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3047    part is missing.  DEPENDS_ON is a set of the invariants the computation
3048    depends on.  */
3049
3050 static unsigned
3051 ptr_difference_cost (struct ivopts_data *data,
3052                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3053                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3054 {
3055   HOST_WIDE_INT diff = 0;
3056   unsigned cost;
3057
3058   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3059
3060   if (ptr_difference_const (e1, e2, &diff))
3061     {
3062       *offset += diff;
3063       *symbol_present = false;
3064       *var_present = false;
3065       return 0;
3066     }
3067
3068   if (e2 == integer_zero_node)
3069     return split_address_cost (data, TREE_OPERAND (e1, 0),
3070                                symbol_present, var_present, offset, depends_on);
3071
3072   *symbol_present = false;
3073   *var_present = true;
3074
3075   cost = force_var_cost (data, e1, depends_on);
3076   cost += force_var_cost (data, e2, depends_on);
3077   cost += add_cost (Pmode);
3078
3079   return cost;
3080 }
3081
3082 /* Estimates cost of expressing difference E1 - E2 as
3083    var + symbol + offset.  The value of offset is added to OFFSET,
3084    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3085    part is missing.  DEPENDS_ON is a set of the invariants the computation
3086    depends on.  */
3087
3088 static unsigned
3089 difference_cost (struct ivopts_data *data,
3090                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3091                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3092 {
3093   unsigned cost;
3094   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3095   unsigned HOST_WIDE_INT off1, off2;
3096
3097   e1 = strip_offset (e1, false, &off1);
3098   e2 = strip_offset (e2, false, &off2);
3099   *offset += off1 - off2;
3100
3101   STRIP_NOPS (e1);
3102   STRIP_NOPS (e2);
3103
3104   if (TREE_CODE (e1) == ADDR_EXPR)
3105     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3106                                 depends_on);
3107   *symbol_present = false;
3108
3109   if (operand_equal_p (e1, e2, 0))
3110     {
3111       *var_present = false;
3112       return 0;
3113     }
3114   *var_present = true;
3115   if (zero_p (e2))
3116     return force_var_cost (data, e1, depends_on);
3117
3118   if (zero_p (e1))
3119     {
3120       cost = force_var_cost (data, e2, depends_on);
3121       cost += multiply_by_cost (-1, mode);
3122
3123       return cost;
3124     }
3125
3126   cost = force_var_cost (data, e1, depends_on);
3127   cost += force_var_cost (data, e2, depends_on);
3128   cost += add_cost (mode);
3129
3130   return cost;
3131 }
3132
3133 /* Determines the cost of the computation by that USE is expressed
3134    from induction variable CAND.  If ADDRESS_P is true, we just need
3135    to create an address from it, otherwise we want to get it into
3136    register.  A set of invariants we depend on is stored in
3137    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3138
3139 static unsigned
3140 get_computation_cost_at (struct ivopts_data *data,
3141                          struct iv_use *use, struct iv_cand *cand,
3142                          bool address_p, bitmap *depends_on, tree at)
3143 {
3144   tree ubase = use->iv->base, ustep = use->iv->step;
3145   tree cbase, cstep;
3146   tree utype = TREE_TYPE (ubase), ctype;
3147   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3148   HOST_WIDE_INT ratio, aratio;
3149   bool var_present, symbol_present;
3150   unsigned cost = 0, n_sums;
3151
3152   *depends_on = NULL;
3153
3154   /* Only consider real candidates.  */
3155   if (!cand->iv)
3156     return INFTY;
3157
3158   cbase = cand->iv->base;
3159   cstep = cand->iv->step;
3160   ctype = TREE_TYPE (cbase);
3161
3162   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3163     {
3164       /* We do not have a precision to express the values of use.  */
3165       return INFTY;
3166     }
3167
3168   if (address_p)
3169     {
3170       /* Do not try to express address of an object with computation based
3171          on address of a different object.  This may cause problems in rtl
3172          level alias analysis (that does not expect this to be happening,
3173          as this is illegal in C), and would be unlikely to be useful
3174          anyway.  */
3175       if (use->iv->base_object
3176           && cand->iv->base_object
3177           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3178         return INFTY;
3179     }
3180
3181   if (!cst_and_fits_in_hwi (ustep)
3182       || !cst_and_fits_in_hwi (cstep))
3183     return INFTY;
3184
3185   if (TREE_CODE (ubase) == INTEGER_CST
3186       && !cst_and_fits_in_hwi (ubase))
3187     goto fallback;
3188
3189   if (TREE_CODE (cbase) == INTEGER_CST
3190       && !cst_and_fits_in_hwi (cbase))
3191     goto fallback;
3192
3193   ustepi = int_cst_value (ustep);
3194   cstepi = int_cst_value (cstep);
3195
3196   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3197     {
3198       /* TODO -- add direct handling of this case.  */
3199       goto fallback;
3200     }
3201
3202   if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3203     return INFTY;
3204
3205   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3206      or ratio == 1, it is better to handle this like
3207
3208      ubase - ratio * cbase + ratio * var
3209
3210      (also holds in the case ratio == -1, TODO.  */
3211
3212   if (TREE_CODE (cbase) == INTEGER_CST)
3213     {
3214       offset = - ratio * int_cst_value (cbase);
3215       cost += difference_cost (data,
3216                                ubase, integer_zero_node,
3217                                &symbol_present, &var_present, &offset,
3218                                depends_on);
3219     }
3220   else if (ratio == 1)
3221     {
3222       cost += difference_cost (data,
3223                                ubase, cbase,
3224                                &symbol_present, &var_present, &offset,
3225                                depends_on);
3226     }
3227   else
3228     {
3229       cost += force_var_cost (data, cbase, depends_on);
3230       cost += add_cost (TYPE_MODE (ctype));
3231       cost += difference_cost (data,
3232                                ubase, integer_zero_node,
3233                                &symbol_present, &var_present, &offset,
3234                                depends_on);
3235     }
3236
3237   /* If we are after the increment, the value of the candidate is higher by
3238      one iteration.  */
3239   if (stmt_after_increment (data->current_loop, cand, at))
3240     offset -= ratio * cstepi;
3241
3242   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3243      (symbol/var/const parts may be omitted).  If we are looking for an address,
3244      find the cost of addressing this.  */
3245   if (address_p)
3246     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3247
3248   /* Otherwise estimate the costs for computing the expression.  */
3249   aratio = ratio > 0 ? ratio : -ratio;
3250   if (!symbol_present && !var_present && !offset)
3251     {
3252       if (ratio != 1)
3253         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3254
3255       return cost;
3256     }
3257
3258   if (aratio != 1)
3259     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3260
3261   n_sums = 1;
3262   if (var_present
3263       /* Symbol + offset should be compile-time computable.  */
3264       && (symbol_present || offset))
3265     n_sums++;
3266
3267   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3268
3269 fallback:
3270   {
3271     /* Just get the expression, expand it and measure the cost.  */
3272     tree comp = get_computation_at (data->current_loop, use, cand, at);
3273
3274     if (!comp)
3275       return INFTY;
3276
3277     if (address_p)
3278       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3279
3280     return computation_cost (comp);
3281   }
3282 }
3283
3284 /* Determines the cost of the computation by that USE is expressed
3285    from induction variable CAND.  If ADDRESS_P is true, we just need
3286    to create an address from it, otherwise we want to get it into
3287    register.  A set of invariants we depend on is stored in
3288    DEPENDS_ON.  */
3289
3290 static unsigned
3291 get_computation_cost (struct ivopts_data *data,
3292                       struct iv_use *use, struct iv_cand *cand,
3293                       bool address_p, bitmap *depends_on)
3294 {
3295   return get_computation_cost_at (data,
3296                                   use, cand, address_p, depends_on, use->stmt);
3297 }
3298
3299 /* Determines cost of basing replacement of USE on CAND in a generic
3300    expression.  */
3301
3302 static bool
3303 determine_use_iv_cost_generic (struct ivopts_data *data,
3304                                struct iv_use *use, struct iv_cand *cand)
3305 {
3306   bitmap depends_on;
3307   unsigned cost;
3308
3309   /* The simple case first -- if we need to express value of the preserved
3310      original biv, the cost is 0.  This also prevents us from counting the
3311      cost of increment twice -- once at this use and once in the cost of
3312      the candidate.  */
3313   if (cand->pos == IP_ORIGINAL
3314       && cand->incremented_at == use->stmt)
3315     {
3316       set_use_iv_cost (data, use, cand, 0, NULL);
3317       return true;
3318     }
3319
3320   cost = get_computation_cost (data, use, cand, false, &depends_on);
3321   set_use_iv_cost (data, use, cand, cost, depends_on);
3322
3323   return cost != INFTY;
3324 }
3325
3326 /* Determines cost of basing replacement of USE on CAND in an address.  */
3327
3328 static bool
3329 determine_use_iv_cost_address (struct ivopts_data *data,
3330                                struct iv_use *use, struct iv_cand *cand)
3331 {
3332   bitmap depends_on;
3333   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3334
3335   set_use_iv_cost (data, use, cand, cost, depends_on);
3336
3337   return cost != INFTY;
3338 }
3339
3340 /* Computes value of induction variable IV in iteration NITER.  */
3341
3342 static tree
3343 iv_value (struct iv *iv, tree niter)
3344 {
3345   tree val;
3346   tree type = TREE_TYPE (iv->base);
3347
3348   niter = fold_convert (type, niter);
3349   val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3350
3351   return fold (build2 (PLUS_EXPR, type, iv->base, val));
3352 }
3353
3354 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3355
3356 static tree
3357 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3358 {
3359   tree val = iv_value (cand->iv, niter);
3360   tree type = TREE_TYPE (cand->iv->base);
3361
3362   if (stmt_after_increment (loop, cand, at))
3363     val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3364
3365   return val;
3366 }
3367
3368 /* Returns period of induction variable iv.  */
3369
3370 static tree
3371 iv_period (struct iv *iv)
3372 {
3373   tree step = iv->step, period, type;
3374   tree pow2div;
3375
3376   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3377
3378   /* Period of the iv is gcd (step, type range).  Since type range is power
3379      of two, it suffices to determine the maximum power of two that divides
3380      step.  */
3381   pow2div = num_ending_zeros (step);
3382   type = unsigned_type_for (TREE_TYPE (step));
3383
3384   period = build_low_bits_mask (type,
3385                                 (TYPE_PRECISION (type)
3386                                  - tree_low_cst (pow2div, 1)));
3387
3388   return period;
3389 }
3390
3391 /* Check whether it is possible to express the condition in USE by comparison
3392    of candidate CAND.  If so, store the comparison code to COMPARE and the
3393    value compared with to BOUND.  */
3394
3395 static bool
3396 may_eliminate_iv (struct ivopts_data *data,
3397                   struct iv_use *use, struct iv_cand *cand,
3398                   enum tree_code *compare, tree *bound)
3399 {
3400   basic_block ex_bb;
3401   edge exit;
3402   struct tree_niter_desc *niter;
3403   tree nit, nit_type;
3404   tree wider_type, period, per_type;
3405   struct loop *loop = data->current_loop;
3406
3407   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3408      for other conditions inside loop body.  */
3409   ex_bb = bb_for_stmt (use->stmt);
3410   if (use->stmt != last_stmt (ex_bb)
3411       || TREE_CODE (use->stmt) != COND_EXPR)
3412     return false;
3413   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3414     return false;
3415
3416   exit = EDGE_SUCC (ex_bb, 0);
3417   if (flow_bb_inside_loop_p (loop, exit->dest))
3418     exit = EDGE_SUCC (ex_bb, 1);
3419   if (flow_bb_inside_loop_p (loop, exit->dest))
3420     return false;
3421
3422   niter = niter_for_exit (data, exit);
3423   if (!niter
3424       || !zero_p (niter->may_be_zero))
3425     return false;
3426
3427   nit = niter->niter;
3428   nit_type = TREE_TYPE (nit);
3429
3430   /* Determine whether we may use the variable to test whether niter iterations
3431      elapsed.  This is the case iff the period of the induction variable is
3432      greater than the number of iterations.  */
3433   period = iv_period (cand->iv);
3434   if (!period)
3435     return false;
3436   per_type = TREE_TYPE (period);
3437
3438   wider_type = TREE_TYPE (period);
3439   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3440     wider_type = per_type;
3441   else
3442     wider_type = nit_type;
3443
3444   if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3445                                        fold_convert (wider_type, period),
3446                                        fold_convert (wider_type, nit)))))
3447     return false;
3448
3449   if (exit->flags & EDGE_TRUE_VALUE)
3450     *compare = EQ_EXPR;
3451   else
3452     *compare = NE_EXPR;
3453
3454   *bound = cand_value_at (loop, cand, use->stmt, nit);
3455   return true;
3456 }
3457
3458 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3459
3460 static bool
3461 determine_use_iv_cost_condition (struct ivopts_data *data,
3462                                  struct iv_use *use, struct iv_cand *cand)
3463 {
3464   tree bound;
3465   enum tree_code compare;
3466
3467   /* Only consider real candidates.  */
3468   if (!cand->iv)
3469     {
3470       set_use_iv_cost (data, use, cand, INFTY, NULL);
3471       return false;
3472     }
3473
3474   if (may_eliminate_iv (data, use, cand, &compare, &bound))
3475     {
3476       bitmap depends_on = NULL;
3477       unsigned cost = force_var_cost (data, bound, &depends_on);
3478
3479       set_use_iv_cost (data, use, cand, cost, depends_on);
3480       return cost != INFTY;
3481     }
3482
3483   /* The induction variable elimination failed; just express the original
3484      giv.  If it is compared with an invariant, note that we cannot get
3485      rid of it.  */
3486   if (TREE_CODE (*use->op_p) == SSA_NAME)
3487     record_invariant (data, *use->op_p, true);
3488   else
3489     {
3490       record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3491       record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3492     }
3493
3494   return determine_use_iv_cost_generic (data, use, cand);
3495 }
3496
3497 /* Checks whether it is possible to replace the final value of USE by
3498    a direct computation.  If so, the formula is stored to *VALUE.  */
3499
3500 static bool
3501 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
3502                          tree *value)
3503 {
3504   struct loop *loop = data->current_loop;
3505   edge exit;
3506   struct tree_niter_desc *niter;
3507
3508   exit = single_dom_exit (loop);
3509   if (!exit)
3510     return false;
3511
3512   gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3513                               bb_for_stmt (use->stmt)));
3514
3515   niter = niter_for_single_dom_exit (data);
3516   if (!niter
3517       || !zero_p (niter->may_be_zero))
3518     return false;
3519
3520   *value = iv_value (use->iv, niter->niter);
3521
3522   return true;
3523 }
3524
3525 /* Determines cost of replacing final value of USE using CAND.  */
3526
3527 static bool
3528 determine_use_iv_cost_outer (struct ivopts_data *data,
3529                              struct iv_use *use, struct iv_cand *cand)
3530 {
3531   bitmap depends_on;
3532   unsigned cost;
3533   edge exit;
3534   tree value;
3535   struct loop *loop = data->current_loop;
3536
3537   /* The simple case first -- if we need to express value of the preserved
3538      original biv, the cost is 0.  This also prevents us from counting the
3539      cost of increment twice -- once at this use and once in the cost of
3540      the candidate.  */
3541   if (cand->pos == IP_ORIGINAL
3542       && cand->incremented_at == use->stmt)
3543     {
3544       set_use_iv_cost (data, use, cand, 0, NULL);
3545       return true;
3546     }
3547
3548   if (!cand->iv)
3549     {
3550       if (!may_replace_final_value (data, use, &value))
3551         {
3552           set_use_iv_cost (data, use, cand, INFTY, NULL);
3553           return false;
3554         }
3555
3556       depends_on = NULL;
3557       cost = force_var_cost (data, value, &depends_on);
3558
3559       cost /= AVG_LOOP_NITER (loop);
3560
3561       set_use_iv_cost (data, use, cand, cost, depends_on);
3562       return cost != INFTY;
3563     }
3564
3565   exit = single_dom_exit (loop);
3566   if (exit)
3567     {
3568       /* If there is just a single exit, we may use value of the candidate
3569          after we take it to determine the value of use.  */
3570       cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3571                                       last_stmt (exit->src));
3572       if (cost != INFTY)
3573         cost /= AVG_LOOP_NITER (loop);
3574     }
3575   else
3576     {
3577       /* Otherwise we just need to compute the iv.  */
3578       cost = get_computation_cost (data, use, cand, false, &depends_on);
3579     }
3580
3581   set_use_iv_cost (data, use, cand, cost, depends_on);
3582
3583   return cost != INFTY;
3584 }
3585
3586 /* Determines cost of basing replacement of USE on CAND.  Returns false
3587    if USE cannot be based on CAND.  */
3588
3589 static bool
3590 determine_use_iv_cost (struct ivopts_data *data,
3591                        struct iv_use *use, struct iv_cand *cand)
3592 {
3593   switch (use->type)
3594     {
3595     case USE_NONLINEAR_EXPR:
3596       return determine_use_iv_cost_generic (data, use, cand);
3597
3598     case USE_OUTER:
3599       return determine_use_iv_cost_outer (data, use, cand);
3600
3601     case USE_ADDRESS:
3602       return determine_use_iv_cost_address (data, use, cand);
3603
3604     case USE_COMPARE:
3605       return determine_use_iv_cost_condition (data, use, cand);
3606
3607     default:
3608       gcc_unreachable ();
3609     }
3610 }
3611
3612 /* Determines costs of basing the use of the iv on an iv candidate.  */
3613
3614 static void
3615 determine_use_iv_costs (struct ivopts_data *data)
3616 {
3617   unsigned i, j;
3618   struct iv_use *use;
3619   struct iv_cand *cand;
3620   bitmap to_clear = BITMAP_ALLOC (NULL);
3621
3622   alloc_use_cost_map (data);
3623
3624   for (i = 0; i < n_iv_uses (data); i++)
3625     {
3626       use = iv_use (data, i);
3627
3628       if (data->consider_all_candidates)
3629         {
3630           for (j = 0; j < n_iv_cands (data); j++)
3631             {
3632               cand = iv_cand (data, j);
3633               determine_use_iv_cost (data, use, cand);
3634             }
3635         }
3636       else
3637         {
3638           bitmap_iterator bi;
3639
3640           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3641             {
3642               cand = iv_cand (data, j);
3643               if (!determine_use_iv_cost (data, use, cand))
3644                 bitmap_set_bit (to_clear, j);
3645             }
3646
3647           /* Remove the candidates for that the cost is infinite from
3648              the list of related candidates.  */
3649           bitmap_and_compl_into (use->related_cands, to_clear);
3650           bitmap_clear (to_clear);
3651         }
3652     }
3653
3654   BITMAP_FREE (to_clear);
3655
3656   if (dump_file && (dump_flags & TDF_DETAILS))
3657     {
3658       fprintf (dump_file, "Use-candidate costs:\n");
3659
3660       for (i = 0; i < n_iv_uses (data); i++)
3661         {
3662           use = iv_use (data, i);
3663
3664           fprintf (dump_file, "Use %d:\n", i);
3665           fprintf (dump_file, "  cand\tcost\tdepends on\n");
3666           for (j = 0; j < use->n_map_members; j++)
3667             {
3668               if (!use->cost_map[j].cand
3669                   || use->cost_map[j].cost == INFTY)
3670                 continue;
3671
3672               fprintf (dump_file, "  %d\t%d\t",
3673                        use->cost_map[j].cand->id,
3674                        use->cost_map[j].cost);
3675               if (use->cost_map[j].depends_on)
3676                 bitmap_print (dump_file,
3677                               use->cost_map[j].depends_on, "","");
3678               fprintf (dump_file, "\n");
3679             }
3680
3681           fprintf (dump_file, "\n");
3682         }
3683       fprintf (dump_file, "\n");
3684     }
3685 }
3686
3687 /* Determines cost of the candidate CAND.  */
3688
3689 static void
3690 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3691 {
3692   unsigned cost_base, cost_step;
3693   tree base;
3694
3695   if (!cand->iv)
3696     {
3697       cand->cost = 0;
3698       return;
3699     }
3700
3701   /* There are two costs associated with the candidate -- its increment
3702      and its initialization.  The second is almost negligible for any loop
3703      that rolls enough, so we take it just very little into account.  */
3704
3705   base = cand->iv->base;
3706   cost_base = force_var_cost (data, base, NULL);
3707   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3708
3709   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3710
3711   /* Prefer the original iv unless we may gain something by replacing it.  */
3712   if (cand->pos == IP_ORIGINAL)
3713     cand->cost--;
3714
3715   /* Prefer not to insert statements into latch unless there are some
3716      already (so that we do not create unnecessary jumps).  */
3717   if (cand->pos == IP_END
3718       && empty_block_p (ip_end_pos (data->current_loop)))
3719     cand->cost++;
3720 }
3721
3722 /* Determines costs of computation of the candidates.  */
3723
3724 static void
3725 determine_iv_costs (struct ivopts_data *data)
3726 {
3727   unsigned i;
3728
3729   if (dump_file && (dump_flags & TDF_DETAILS))
3730     {
3731       fprintf (dump_file, "Candidate costs:\n");
3732       fprintf (dump_file, "  cand\tcost\n");
3733     }
3734
3735   for (i = 0; i < n_iv_cands (data); i++)
3736     {
3737       struct iv_cand *cand = iv_cand (data, i);
3738
3739       determine_iv_cost (data, cand);
3740
3741       if (dump_file && (dump_flags & TDF_DETAILS))
3742         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3743     }
3744
3745 if (dump_file && (dump_flags & TDF_DETAILS))
3746       fprintf (dump_file, "\n");
3747 }
3748
3749 /* Calculates cost for having SIZE induction variables.  */
3750
3751 static unsigned
3752 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3753 {
3754   return global_cost_for_size (size,
3755                                loop_data (data->current_loop)->regs_used,
3756                                n_iv_uses (data));
3757 }
3758
3759 /* For each size of the induction variable set determine the penalty.  */
3760
3761 static void
3762 determine_set_costs (struct ivopts_data *data)
3763 {
3764   unsigned j, n;
3765   tree phi, op;
3766   struct loop *loop = data->current_loop;
3767   bitmap_iterator bi;
3768
3769   /* We use the following model (definitely improvable, especially the
3770      cost function -- TODO):
3771
3772      We estimate the number of registers available (using MD data), name it A.
3773
3774      We estimate the number of registers used by the loop, name it U.  This
3775      number is obtained as the number of loop phi nodes (not counting virtual
3776      registers and bivs) + the number of variables from outside of the loop.
3777
3778      We set a reserve R (free regs that are used for temporary computations,
3779      etc.).  For now the reserve is a constant 3.
3780
3781      Let I be the number of induction variables.
3782
3783      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3784         make a lot of ivs without a reason).
3785      -- if A - R < U + I <= A, the cost is I * PRES_COST
3786      -- if U + I > A, the cost is I * PRES_COST and
3787         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
3788
3789   if (dump_file && (dump_flags & TDF_DETAILS))
3790     {
3791       fprintf (dump_file, "Global costs:\n");
3792       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
3793       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
3794       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
3795       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
3796     }
3797
3798   n = 0;
3799   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3800     {
3801       op = PHI_RESULT (phi);
3802
3803       if (!is_gimple_reg (op))
3804         continue;
3805
3806       if (get_iv (data, op))
3807         continue;
3808
3809       n++;
3810     }
3811
3812   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3813     {
3814       struct version_info *info = ver_info (data, j);
3815
3816       if (info->inv_id && info->has_nonlin_use)
3817         n++;
3818     }
3819
3820   loop_data (loop)->regs_used = n;
3821   if (dump_file && (dump_flags & TDF_DETAILS))
3822     fprintf (dump_file, "  regs_used %d\n", n);
3823
3824   if (dump_file && (dump_flags & TDF_DETAILS))
3825     {
3826       fprintf (dump_file, "  cost for size:\n");
3827       fprintf (dump_file, "  ivs\tcost\n");
3828       for (j = 0; j <= 2 * target_avail_regs; j++)
3829         fprintf (dump_file, "  %d\t%d\n", j,
3830                  ivopts_global_cost_for_size (data, j));
3831       fprintf (dump_file, "\n");
3832     }
3833 }
3834
3835 /* Returns true if A is a cheaper cost pair than B.  */
3836
3837 static bool
3838 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3839 {
3840   if (!a)
3841     return false;
3842
3843   if (!b)
3844     return true;
3845
3846   if (a->cost < b->cost)
3847     return true;
3848
3849   if (a->cost > b->cost)
3850     return false;
3851
3852   /* In case the costs are the same, prefer the cheaper candidate.  */
3853   if (a->cand->cost < b->cand->cost)
3854     return true;
3855
3856   return false;
3857 }
3858
3859 /* Computes the cost field of IVS structure.  */
3860
3861 static void
3862 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3863 {
3864   unsigned cost = 0;
3865
3866   cost += ivs->cand_use_cost;
3867   cost += ivs->cand_cost;
3868   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3869
3870   ivs->cost = cost;
3871 }
3872
3873 /* Set USE not to be expressed by any candidate in IVS.  */
3874
3875 static void
3876 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3877                  struct iv_use *use)
3878 {
3879   unsigned uid = use->id, cid, iid;
3880   bitmap deps;
3881   struct cost_pair *cp;
3882   bitmap_iterator bi;
3883
3884   cp = ivs->cand_for_use[uid];
3885   if (!cp)
3886     return;
3887   cid = cp->cand->id;
3888
3889   ivs->bad_uses++;
3890   ivs->cand_for_use[uid] = NULL;
3891   ivs->n_cand_uses[cid]--;
3892
3893   if (ivs->n_cand_uses[cid] == 0)
3894     {
3895       bitmap_clear_bit (ivs->cands, cid);
3896       /* Do not count the pseudocandidates.  */
3897       if (cp->cand->iv)
3898         ivs->n_regs--;
3899       ivs->n_cands--;
3900       ivs->cand_cost -= cp->cand->cost;
3901     }
3902
3903   ivs->cand_use_cost -= cp->cost;
3904
3905   deps = cp->depends_on;
3906
3907   if (deps)
3908     {
3909       EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3910         {
3911           ivs->n_invariant_uses[iid]--;
3912           if (ivs->n_invariant_uses[iid] == 0)
3913             ivs->n_regs--;
3914         }
3915     }
3916
3917   iv_ca_recount_cost (data, ivs);
3918 }
3919
3920 /* Set cost pair for USE in set IVS to CP.  */
3921
3922 static void
3923 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3924               struct iv_use *use, struct cost_pair *cp)
3925 {
3926   unsigned uid = use->id, cid, iid;
3927   bitmap deps;
3928   bitmap_iterator bi;
3929
3930   if (ivs->cand_for_use[uid] == cp)
3931     return;
3932
3933   if (ivs->cand_for_use[uid])
3934     iv_ca_set_no_cp (data, ivs, use);
3935
3936   if (cp)
3937     {
3938       cid = cp->cand->id;
3939
3940       ivs->bad_uses--;
3941       ivs->cand_for_use[uid] = cp;
3942       ivs->n_cand_uses[cid]++;
3943       if (ivs->n_cand_uses[cid] == 1)
3944         {
3945           bitmap_set_bit (ivs->cands, cid);
3946           /* Do not count the pseudocandidates.  */
3947           if (cp->cand->iv)
3948             ivs->n_regs++;
3949           ivs->n_cands++;
3950           ivs->cand_cost += cp->cand->cost;
3951         }
3952
3953       ivs->cand_use_cost += cp->cost;
3954
3955       deps = cp->depends_on;
3956
3957       if (deps)
3958         {
3959           EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3960             {
3961               ivs->n_invariant_uses[iid]++;
3962               if (ivs->n_invariant_uses[iid] == 1)
3963                 ivs->n_regs++;
3964             }
3965         }
3966
3967       iv_ca_recount_cost (data, ivs);
3968     }
3969 }
3970
3971 /* Extend set IVS by expressing USE by some of the candidates in it
3972    if possible.  */
3973
3974 static void
3975 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3976                struct iv_use *use)
3977 {
3978   struct cost_pair *best_cp = NULL, *cp;
3979   bitmap_iterator bi;
3980   unsigned i;
3981
3982   gcc_assert (ivs->upto >= use->id);
3983
3984   if (ivs->upto == use->id)
3985     {
3986       ivs->upto++;
3987       ivs->bad_uses++;
3988     }
3989
3990   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3991     {
3992       cp = get_use_iv_cost (data, use, iv_cand (data, i));
3993
3994       if (cheaper_cost_pair (cp, best_cp))
3995         best_cp = cp;
3996     }
3997
3998   iv_ca_set_cp (data, ivs, use, best_cp);
3999 }
4000
4001 /* Get cost for assignment IVS.  */
4002
4003 static unsigned
4004 iv_ca_cost (struct iv_ca *ivs)
4005 {
4006   return (ivs->bad_uses ? INFTY : ivs->cost);
4007 }
4008
4009 /* Returns true if all dependences of CP are among invariants in IVS.  */
4010
4011 static bool
4012 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4013 {
4014   unsigned i;
4015   bitmap_iterator bi;
4016
4017   if (!cp->depends_on)
4018     return true;
4019
4020   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4021     {
4022       if (ivs->n_invariant_uses[i] == 0)
4023         return false;
4024     }
4025
4026   return true;
4027 }
4028
4029 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4030    it before NEXT_CHANGE.  */
4031
4032 static struct iv_ca_delta *
4033 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4034                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4035 {
4036   struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4037
4038   change->use = use;
4039   change->old_cp = old_cp;
4040   change->new_cp = new_cp;
4041   change->next_change = next_change;
4042
4043   return change;
4044 }
4045
4046 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4047    are rewritten.   */
4048
4049 static struct iv_ca_delta *
4050 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4051 {
4052   struct iv_ca_delta *last;
4053
4054   if (!l2)
4055     return l1;
4056
4057   if (!l1)
4058     return l2;
4059
4060   for (last = l1; last->next_change; last = last->next_change)
4061     continue;
4062   last->next_change = l2;
4063
4064   return l1;
4065 }
4066
4067 /* Returns candidate by that USE is expressed in IVS.  */
4068
4069 static struct cost_pair *
4070 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4071 {
4072   return ivs->cand_for_use[use->id];
4073 }
4074
4075 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4076
4077 static struct iv_ca_delta *
4078 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4079 {
4080   struct iv_ca_delta *act, *next, *prev = NULL;
4081   struct cost_pair *tmp;
4082
4083   for (act = delta; act; act = next)
4084     {
4085       next = act->next_change;
4086       act->next_change = prev;
4087       prev = act;
4088
4089       tmp = act->old_cp;
4090       act->old_cp = act->new_cp;
4091       act->new_cp = tmp;
4092     }
4093
4094   return prev;
4095 }
4096
4097 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4098    reverted instead.  */
4099
4100 static void
4101 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4102                     struct iv_ca_delta *delta, bool forward)
4103 {
4104   struct cost_pair *from, *to;
4105   struct iv_ca_delta *act;
4106
4107   if (!forward)
4108     delta = iv_ca_delta_reverse (delta);
4109
4110   for (act = delta; act; act = act->next_change)
4111     {
4112       from = act->old_cp;
4113       to = act->new_cp;
4114       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4115       iv_ca_set_cp (data, ivs, act->use, to);
4116     }
4117
4118   if (!forward)
4119     iv_ca_delta_reverse (delta);
4120 }
4121
4122 /* Returns true if CAND is used in IVS.  */
4123
4124 static bool
4125 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4126 {
4127   return ivs->n_cand_uses[cand->id] > 0;
4128 }
4129
4130 /* Returns number of induction variable candidates in the set IVS.  */
4131
4132 static unsigned
4133 iv_ca_n_cands (struct iv_ca *ivs)
4134 {
4135   return ivs->n_cands;
4136 }
4137
4138 /* Free the list of changes DELTA.  */
4139
4140 static void
4141 iv_ca_delta_free (struct iv_ca_delta **delta)
4142 {
4143   struct iv_ca_delta *act, *next;
4144
4145   for (act = *delta; act; act = next)
4146     {
4147       next = act->next_change;
4148       free (act);
4149     }
4150
4151   *delta = NULL;
4152 }
4153
4154 /* Allocates new iv candidates assignment.  */
4155
4156 static struct iv_ca *
4157 iv_ca_new (struct ivopts_data *data)
4158 {
4159   struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4160
4161   nw->upto = 0;
4162   nw->bad_uses = 0;
4163   nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4164   nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4165   nw->cands = BITMAP_ALLOC (NULL);
4166   nw->n_cands = 0;
4167   nw->n_regs = 0;
4168   nw->cand_use_cost = 0;
4169   nw->cand_cost = 0;
4170   nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4171   nw->cost = 0;
4172
4173   return nw;
4174 }
4175
4176 /* Free memory occupied by the set IVS.  */
4177
4178 static void
4179 iv_ca_free (struct iv_ca **ivs)
4180 {
4181   free ((*ivs)->cand_for_use);
4182   free ((*ivs)->n_cand_uses);
4183   BITMAP_FREE ((*ivs)->cands);
4184   free ((*ivs)->n_invariant_uses);
4185   free (*ivs);
4186   *ivs = NULL;
4187 }
4188
4189 /* Dumps IVS to FILE.  */
4190
4191 static void
4192 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4193 {
4194   const char *pref = "  invariants ";
4195   unsigned i;
4196
4197   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4198   bitmap_print (file, ivs->cands, "  candidates ","\n");
4199
4200   for (i = 1; i <= data->max_inv_id; i++)
4201     if (ivs->n_invariant_uses[i])
4202       {
4203         fprintf (file, "%s%d", pref, i);
4204         pref = ", ";
4205       }
4206   fprintf (file, "\n");
4207 }
4208
4209 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4210    new set, and store differences in DELTA.  Number of induction variables
4211    in the new set is stored to N_IVS.  */
4212
4213 static unsigned
4214 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4215               struct iv_cand *cand, struct iv_ca_delta **delta,
4216               unsigned *n_ivs)
4217 {
4218   unsigned i, cost;
4219   struct iv_use *use;
4220   struct cost_pair *old_cp, *new_cp;
4221
4222   *delta = NULL;
4223   for (i = 0; i < ivs->upto; i++)
4224     {
4225       use = iv_use (data, i);
4226       old_cp = iv_ca_cand_for_use (ivs, use);
4227
4228       if (old_cp
4229           && old_cp->cand == cand)
4230         continue;
4231
4232       new_cp = get_use_iv_cost (data, use, cand);
4233       if (!new_cp)
4234         continue;
4235
4236       if (!iv_ca_has_deps (ivs, new_cp))
4237         continue;
4238
4239       if (!cheaper_cost_pair (new_cp, old_cp))
4240         continue;
4241
4242       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4243     }
4244
4245   iv_ca_delta_commit (data, ivs, *delta, true);
4246   cost = iv_ca_cost (ivs);
4247   if (n_ivs)
4248     *n_ivs = iv_ca_n_cands (ivs);
4249   iv_ca_delta_commit (data, ivs, *delta, false);
4250
4251   return cost;
4252 }
4253
4254 /* Try narrowing set IVS by removing CAND.  Return the cost of
4255    the new set and store the differences in DELTA.  */
4256
4257 static unsigned
4258 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4259               struct iv_cand *cand, struct iv_ca_delta **delta)
4260 {
4261   unsigned i, ci;
4262   struct iv_use *use;
4263   struct cost_pair *old_cp, *new_cp, *cp;
4264   bitmap_iterator bi;
4265   struct iv_cand *cnd;
4266   unsigned cost;
4267
4268   *delta = NULL;
4269   for (i = 0; i < n_iv_uses (data); i++)
4270     {
4271       use = iv_use (data, i);
4272
4273       old_cp = iv_ca_cand_for_use (ivs, use);
4274       if (old_cp->cand != cand)
4275         continue;
4276
4277       new_cp = NULL;
4278
4279       if (data->consider_all_candidates)
4280         {
4281           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4282             {
4283               if (ci == cand->id)
4284                 continue;
4285
4286               cnd = iv_cand (data, ci);
4287
4288               cp = get_use_iv_cost (data, use, cnd);
4289               if (!cp)
4290                 continue;
4291               if (!iv_ca_has_deps (ivs, cp))
4292                 continue;
4293
4294               if (!cheaper_cost_pair (cp, new_cp))
4295                 continue;
4296
4297               new_cp = cp;
4298             }
4299         }
4300       else
4301         {
4302           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4303             {
4304               if (ci == cand->id)
4305                 continue;
4306
4307               cnd = iv_cand (data, ci);
4308
4309               cp = get_use_iv_cost (data, use, cnd);
4310               if (!cp)
4311                 continue;
4312               if (!iv_ca_has_deps (ivs, cp))
4313                 continue;
4314
4315               if (!cheaper_cost_pair (cp, new_cp))
4316                 continue;
4317
4318               new_cp = cp;
4319             }
4320         }
4321
4322       if (!new_cp)
4323         {
4324           iv_ca_delta_free (delta);
4325           return INFTY;
4326         }
4327
4328       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4329     }
4330
4331   iv_ca_delta_commit (data, ivs, *delta, true);
4332   cost = iv_ca_cost (ivs);
4333   iv_ca_delta_commit (data, ivs, *delta, false);
4334
4335   return cost;
4336 }
4337
4338 /* Try optimizing the set of candidates IVS by removing candidates different
4339    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4340    differences in DELTA.  */
4341
4342 static unsigned
4343 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4344              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4345 {
4346   bitmap_iterator bi;
4347   struct iv_ca_delta *act_delta, *best_delta;
4348   unsigned i, best_cost, acost;
4349   struct iv_cand *cand;
4350
4351   best_delta = NULL;
4352   best_cost = iv_ca_cost (ivs);
4353
4354   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4355     {
4356       cand = iv_cand (data, i);
4357
4358       if (cand == except_cand)
4359         continue;
4360
4361       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4362
4363       if (acost < best_cost)
4364         {
4365           best_cost = acost;
4366           iv_ca_delta_free (&best_delta);
4367           best_delta = act_delta;
4368         }
4369       else
4370         iv_ca_delta_free (&act_delta);
4371     }
4372
4373   if (!best_delta)
4374     {
4375       *delta = NULL;
4376       return best_cost;
4377     }
4378
4379   /* Recurse to possibly remove other unnecessary ivs.  */
4380   iv_ca_delta_commit (data, ivs, best_delta, true);
4381   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4382   iv_ca_delta_commit (data, ivs, best_delta, false);
4383   *delta = iv_ca_delta_join (best_delta, *delta);
4384   return best_cost;
4385 }
4386
4387 /* Tries to extend the sets IVS in the best possible way in order
4388    to express the USE.  */
4389
4390 static bool
4391 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4392                   struct iv_use *use)
4393 {
4394   unsigned best_cost, act_cost;
4395   unsigned i;
4396   bitmap_iterator bi;
4397   struct iv_cand *cand;
4398   struct iv_ca_delta *best_delta = NULL, *act_delta;
4399   struct cost_pair *cp;
4400
4401   iv_ca_add_use (data, ivs, use);
4402   best_cost = iv_ca_cost (ivs);
4403
4404   cp = iv_ca_cand_for_use (ivs, use);
4405   if (cp)
4406     {
4407       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4408       iv_ca_set_no_cp (data, ivs, use);
4409     }
4410
4411   /* First try important candidates.  Only if it fails, try the specific ones.
4412      Rationale -- in loops with many variables the best choice often is to use
4413      just one generic biv.  If we added here many ivs specific to the uses,
4414      the optimization algorithm later would be likely to get stuck in a local
4415      minimum, thus causing us to create too many ivs.  The approach from
4416      few ivs to more seems more likely to be successful -- starting from few
4417      ivs, replacing an expensive use by a specific iv should always be a
4418      win.  */
4419   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4420     {
4421       cand = iv_cand (data, i);
4422
4423       if (iv_ca_cand_used_p (ivs, cand))
4424         continue;
4425
4426       cp = get_use_iv_cost (data, use, cand);
4427       if (!cp)
4428         continue;
4429
4430       iv_ca_set_cp (data, ivs, use, cp);
4431       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4432       iv_ca_set_no_cp (data, ivs, use);
4433       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4434
4435       if (act_cost < best_cost)
4436         {
4437           best_cost = act_cost;
4438
4439           iv_ca_delta_free (&best_delta);
4440           best_delta = act_delta;
4441         }
4442       else
4443         iv_ca_delta_free (&act_delta);
4444     }
4445
4446   if (best_cost == INFTY)
4447     {
4448       for (i = 0; i < use->n_map_members; i++)
4449         {
4450           cp = use->cost_map + i;
4451           cand = cp->cand;
4452           if (!cand)
4453             continue;
4454
4455           /* Already tried this.  */
4456           if (cand->important)
4457             continue;
4458
4459           if (iv_ca_cand_used_p (ivs, cand))
4460             continue;
4461
4462           act_delta = NULL;
4463           iv_ca_set_cp (data, ivs, use, cp);
4464           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4465           iv_ca_set_no_cp (data, ivs, use);
4466           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4467                                        cp, act_delta);
4468
4469           if (act_cost < best_cost)
4470             {
4471               best_cost = act_cost;
4472
4473               if (best_delta)
4474                 iv_ca_delta_free (&best_delta);
4475               best_delta = act_delta;
4476             }
4477           else
4478             iv_ca_delta_free (&act_delta);
4479         }
4480     }
4481
4482   iv_ca_delta_commit (data, ivs, best_delta, true);
4483   iv_ca_delta_free (&best_delta);
4484
4485   return (best_cost != INFTY);
4486 }
4487
4488 /* Finds an initial assignment of candidates to uses.  */
4489
4490 static struct iv_ca *
4491 get_initial_solution (struct ivopts_data *data)
4492 {
4493   struct iv_ca *ivs = iv_ca_new (data);
4494   unsigned i;
4495
4496   for (i = 0; i < n_iv_uses (data); i++)
4497     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4498       {
4499         iv_ca_free (&ivs);
4500         return NULL;
4501       }
4502
4503   return ivs;
4504 }
4505
4506 /* Tries to improve set of induction variables IVS.  */
4507
4508 static bool
4509 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4510 {
4511   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4512   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4513   struct iv_cand *cand;
4514
4515   /* Try extending the set of induction variables by one.  */
4516   for (i = 0; i < n_iv_cands (data); i++)
4517     {
4518       cand = iv_cand (data, i);
4519
4520       if (iv_ca_cand_used_p (ivs, cand))
4521         continue;
4522
4523       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4524       if (!act_delta)
4525         continue;
4526
4527       /* If we successfully added the candidate and the set is small enough,
4528          try optimizing it by removing other candidates.  */
4529       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4530         {
4531           iv_ca_delta_commit (data, ivs, act_delta, true);
4532           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4533           iv_ca_delta_commit (data, ivs, act_delta, false);
4534           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4535         }
4536
4537       if (acost < best_cost)
4538         {
4539           best_cost = acost;
4540           iv_ca_delta_free (&best_delta);
4541           best_delta = act_delta;
4542         }
4543       else
4544         iv_ca_delta_free (&act_delta);
4545     }
4546
4547   if (!best_delta)
4548     {
4549       /* Try removing the candidates from the set instead.  */
4550       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4551
4552       /* Nothing more we can do.  */
4553       if (!best_delta)
4554         return false;
4555     }
4556
4557   iv_ca_delta_commit (data, ivs, best_delta, true);
4558   gcc_assert (best_cost == iv_ca_cost (ivs));
4559   iv_ca_delta_free (&best_delta);
4560   return true;
4561 }
4562
4563 /* Attempts to find the optimal set of induction variables.  We do simple
4564    greedy heuristic -- we try to replace at most one candidate in the selected
4565    solution and remove the unused ivs while this improves the cost.  */
4566
4567 static struct iv_ca *
4568 find_optimal_iv_set (struct ivopts_data *data)
4569 {
4570   unsigned i;
4571   struct iv_ca *set;
4572   struct iv_use *use;
4573
4574   /* Get the initial solution.  */
4575   set = get_initial_solution (data);
4576   if (!set)
4577     {
4578       if (dump_file && (dump_flags & TDF_DETAILS))
4579         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4580       return NULL;
4581     }
4582
4583   if (dump_file && (dump_flags & TDF_DETAILS))
4584     {
4585       fprintf (dump_file, "Initial set of candidates:\n");
4586       iv_ca_dump (data, dump_file, set);
4587     }
4588
4589   while (try_improve_iv_set (data, set))
4590     {
4591       if (dump_file && (dump_flags & TDF_DETAILS))
4592         {
4593           fprintf (dump_file, "Improved to:\n");
4594           iv_ca_dump (data, dump_file, set);
4595         }
4596     }
4597
4598   if (dump_file && (dump_flags & TDF_DETAILS))
4599     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4600
4601   for (i = 0; i < n_iv_uses (data); i++)
4602     {
4603       use = iv_use (data, i);
4604       use->selected = iv_ca_cand_for_use (set, use)->cand;
4605     }
4606
4607   return set;
4608 }
4609
4610 /* Creates a new induction variable corresponding to CAND.  */
4611
4612 static void
4613 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4614 {
4615   block_stmt_iterator incr_pos;
4616   tree base;
4617   bool after = false;
4618
4619   if (!cand->iv)
4620     return;
4621
4622   switch (cand->pos)
4623     {
4624     case IP_NORMAL:
4625       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4626       break;
4627
4628     case IP_END:
4629       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4630       after = true;
4631       break;
4632
4633     case IP_ORIGINAL:
4634       /* Mark that the iv is preserved.  */
4635       name_info (data, cand->var_before)->preserve_biv = true;
4636       name_info (data, cand->var_after)->preserve_biv = true;
4637
4638       /* Rewrite the increment so that it uses var_before directly.  */
4639       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4640
4641       return;
4642     }
4643
4644   gimple_add_tmp_var (cand->var_before);
4645   add_referenced_tmp_var (cand->var_before);
4646
4647   base = unshare_expr (cand->iv->base);
4648
4649   create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4650              &incr_pos, after, &cand->var_before, &cand->var_after);
4651 }
4652
4653 /* Creates new induction variables described in SET.  */
4654
4655 static void
4656 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4657 {
4658   unsigned i;
4659   struct iv_cand *cand;
4660   bitmap_iterator bi;
4661
4662   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4663     {
4664       cand = iv_cand (data, i);
4665       create_new_iv (data, cand);
4666     }
4667 }
4668
4669 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4670    is true, remove also the ssa name defined by the statement.  */
4671
4672 static void
4673 remove_statement (tree stmt, bool including_defined_name)
4674 {
4675   if (TREE_CODE (stmt) == PHI_NODE)
4676     {
4677       if (!including_defined_name)
4678         {
4679           /* Prevent the ssa name defined by the statement from being removed.  */
4680           SET_PHI_RESULT (stmt, NULL);
4681         }
4682       remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
4683     }
4684   else
4685     {
4686       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4687
4688       bsi_remove (&bsi);
4689     }
4690 }
4691
4692 /* Rewrites USE (definition of iv used in a nonlinear expression)
4693    using candidate CAND.  */
4694
4695 static void
4696 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4697                             struct iv_use *use, struct iv_cand *cand)
4698 {
4699   tree comp;
4700   tree op, stmts, tgt, ass;
4701   block_stmt_iterator bsi, pbsi;
4702
4703   /* An important special case -- if we are asked to express value of
4704      the original iv by itself, just exit; there is no need to
4705      introduce a new computation (that might also need casting the
4706      variable to unsigned and back).  */
4707   if (cand->pos == IP_ORIGINAL
4708       && TREE_CODE (use->stmt) == MODIFY_EXPR
4709       && TREE_OPERAND (use->stmt, 0) == cand->var_after)
4710     {
4711       op = TREE_OPERAND (use->stmt, 1);
4712
4713       /* Be a bit careful.  In case variable is expressed in some
4714          complicated way, rewrite it so that we may get rid of this
4715          complicated expression.  */
4716       if ((TREE_CODE (op) == PLUS_EXPR
4717            || TREE_CODE (op) == MINUS_EXPR)
4718           && TREE_OPERAND (op, 0) == cand->var_before
4719           && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
4720         return;
4721     }
4722
4723   comp = unshare_expr (get_computation (data->current_loop,
4724                                         use, cand));
4725   switch (TREE_CODE (use->stmt))
4726     {
4727     case PHI_NODE:
4728       tgt = PHI_RESULT (use->stmt);
4729
4730       /* If we should keep the biv, do not replace it.  */
4731       if (name_info (data, tgt)->preserve_biv)
4732         return;
4733
4734       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4735       while (!bsi_end_p (pbsi)
4736              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4737         {
4738           bsi = pbsi;
4739           bsi_next (&pbsi);
4740         }
4741       break;
4742
4743     case MODIFY_EXPR:
4744       tgt = TREE_OPERAND (use->stmt, 0);
4745       bsi = bsi_for_stmt (use->stmt);
4746       break;
4747
4748     default:
4749       gcc_unreachable ();
4750     }
4751
4752   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4753
4754   if (TREE_CODE (use->stmt) == PHI_NODE)
4755     {
4756       if (stmts)
4757         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4758       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4759       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4760       remove_statement (use->stmt, false);
4761       SSA_NAME_DEF_STMT (tgt) = ass;
4762     }
4763   else
4764     {
4765       if (stmts)
4766         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4767       TREE_OPERAND (use->stmt, 1) = op;
4768     }
4769 }
4770
4771 /* Replaces ssa name in index IDX by its basic variable.  Callback for
4772    for_each_index.  */
4773
4774 static bool
4775 idx_remove_ssa_names (tree base, tree *idx,
4776                       void *data ATTRIBUTE_UNUSED)
4777 {
4778   tree *op;
4779
4780   if (TREE_CODE (*idx) == SSA_NAME)
4781     *idx = SSA_NAME_VAR (*idx);
4782
4783   if (TREE_CODE (base) == ARRAY_REF)
4784     {
4785       op = &TREE_OPERAND (base, 2);
4786       if (*op
4787           && TREE_CODE (*op) == SSA_NAME)
4788         *op = SSA_NAME_VAR (*op);
4789       op = &TREE_OPERAND (base, 3);
4790       if (*op
4791           && TREE_CODE (*op) == SSA_NAME)
4792         *op = SSA_NAME_VAR (*op);
4793     }
4794
4795   return true;
4796 }
4797
4798 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
4799
4800 static tree
4801 unshare_and_remove_ssa_names (tree ref)
4802 {
4803   ref = unshare_expr (ref);
4804   for_each_index (&ref, idx_remove_ssa_names, NULL);
4805
4806   return ref;
4807 }
4808
4809 /* Rewrites base of memory access OP with expression WITH in statement
4810    pointed to by BSI.  */
4811
4812 static void
4813 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4814 {
4815   tree bvar, var, new_var, new_name, copy, name;
4816   tree orig;
4817
4818   var = bvar = get_base_address (*op);
4819
4820   if (!var || TREE_CODE (with) != SSA_NAME)
4821     goto do_rewrite;
4822
4823   gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4824   gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4825   if (TREE_CODE (var) == INDIRECT_REF)
4826     var = TREE_OPERAND (var, 0);
4827   if (TREE_CODE (var) == SSA_NAME)
4828     {
4829       name = var;
4830       var = SSA_NAME_VAR (var);
4831     }
4832   else if (DECL_P (var))
4833     name = NULL_TREE;
4834   else
4835     goto do_rewrite;
4836
4837   if (var_ann (var)->type_mem_tag)
4838     var = var_ann (var)->type_mem_tag;
4839
4840   /* We need to add a memory tag for the variable.  But we do not want
4841      to add it to the temporary used for the computations, since this leads
4842      to problems in redundancy elimination when there are common parts
4843      in two computations referring to the different arrays.  So we copy
4844      the variable to a new temporary.  */
4845   copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4846   if (name)
4847     new_name = duplicate_ssa_name (name, copy);
4848   else
4849     {
4850       new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4851       add_referenced_tmp_var (new_var);
4852       var_ann (new_var)->type_mem_tag = var;
4853       new_name = make_ssa_name (new_var, copy);
4854     }
4855   TREE_OPERAND (copy, 0) = new_name;
4856   bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4857   with = new_name;
4858
4859 do_rewrite:
4860
4861   orig = NULL_TREE;
4862   gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4863   gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4864
4865   if (TREE_CODE (*op) == INDIRECT_REF)
4866     orig = REF_ORIGINAL (*op);
4867   if (!orig)
4868     orig = unshare_and_remove_ssa_names (*op);
4869
4870   *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4871
4872   /* Record the original reference, for purposes of alias analysis.  */
4873   REF_ORIGINAL (*op) = orig;
4874 }
4875
4876 /* Rewrites USE (address that is an iv) using candidate CAND.  */
4877
4878 static void
4879 rewrite_use_address (struct ivopts_data *data,
4880                      struct iv_use *use, struct iv_cand *cand)
4881 {
4882   tree comp = unshare_expr (get_computation (data->current_loop,
4883                                              use, cand));
4884   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4885   tree stmts;
4886   tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4887
4888   if (stmts)
4889     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4890
4891   rewrite_address_base (&bsi, use->op_p, op);
4892 }
4893
4894 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4895    candidate CAND.  */
4896
4897 static void
4898 rewrite_use_compare (struct ivopts_data *data,
4899                      struct iv_use *use, struct iv_cand *cand)
4900 {
4901   tree comp;
4902   tree *op_p, cond, op, stmts, bound;
4903   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4904   enum tree_code compare;
4905
4906   if (may_eliminate_iv (data, use, cand, &compare, &bound))
4907     {
4908       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
4909       tree var_type = TREE_TYPE (var);
4910
4911       bound = fold_convert (var_type, bound);
4912       op = force_gimple_operand (unshare_expr (bound), &stmts,
4913                                  true, NULL_TREE);
4914
4915       if (stmts)
4916         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4917
4918       *use->op_p = build2 (compare, boolean_type_node, var, op);
4919       modify_stmt (use->stmt);
4920       return;
4921     }
4922
4923   /* The induction variable elimination failed; just express the original
4924      giv.  */
4925   comp = unshare_expr (get_computation (data->current_loop, use, cand));
4926
4927   cond = *use->op_p;
4928   op_p = &TREE_OPERAND (cond, 0);
4929   if (TREE_CODE (*op_p) != SSA_NAME
4930       || zero_p (get_iv (data, *op_p)->step))
4931     op_p = &TREE_OPERAND (cond, 1);
4932
4933   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4934   if (stmts)
4935     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4936
4937   *op_p = op;
4938 }
4939
4940 /* Ensure that operand *OP_P may be used at the end of EXIT without
4941    violating loop closed ssa form.  */
4942
4943 static void
4944 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4945 {
4946   basic_block def_bb;
4947   struct loop *def_loop;
4948   tree phi, use;
4949
4950   use = USE_FROM_PTR (op_p);
4951   if (TREE_CODE (use) != SSA_NAME)
4952     return;
4953
4954   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4955   if (!def_bb)
4956     return;
4957
4958   def_loop = def_bb->loop_father;
4959   if (flow_bb_inside_loop_p (def_loop, exit->dest))
4960     return;
4961
4962   /* Try finding a phi node that copies the value out of the loop.  */
4963   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4964     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4965       break;
4966
4967   if (!phi)
4968     {
4969       /* Create such a phi node.  */
4970       tree new_name = duplicate_ssa_name (use, NULL);
4971
4972       phi = create_phi_node (new_name, exit->dest);
4973       SSA_NAME_DEF_STMT (new_name) = phi;
4974       add_phi_arg (phi, use, exit);
4975     }
4976
4977   SET_USE (op_p, PHI_RESULT (phi));
4978 }
4979
4980 /* Ensure that operands of STMT may be used at the end of EXIT without
4981    violating loop closed ssa form.  */
4982
4983 static void
4984 protect_loop_closed_ssa_form (edge exit, tree stmt)
4985 {
4986   use_optype uses;
4987   vuse_optype vuses;
4988   v_may_def_optype v_may_defs;
4989   unsigned i;
4990
4991   get_stmt_operands (stmt);
4992
4993   uses = STMT_USE_OPS (stmt);
4994   for (i = 0; i < NUM_USES (uses); i++)
4995     protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4996
4997   vuses = STMT_VUSE_OPS (stmt);
4998   for (i = 0; i < NUM_VUSES (vuses); i++)
4999     protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
5000
5001   v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5002   for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
5003     protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
5004 }
5005
5006 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5007    so that they are emitted on the correct place, and so that the loop closed
5008    ssa form is preserved.  */
5009
5010 static void
5011 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5012 {
5013   tree_stmt_iterator tsi;
5014   block_stmt_iterator bsi;
5015   tree phi, stmt, def, next;
5016
5017   if (EDGE_COUNT (exit->dest->preds) > 1)
5018     split_loop_exit_edge (exit);
5019
5020   if (TREE_CODE (stmts) == STATEMENT_LIST)
5021     {
5022       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5023         protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
5024     }
5025   else
5026     protect_loop_closed_ssa_form (exit, stmts);
5027
5028   /* Ensure there is label in exit->dest, so that we can
5029      insert after it.  */
5030   tree_block_label (exit->dest);
5031   bsi = bsi_after_labels (exit->dest);
5032   bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5033
5034   if (!op)
5035     return;
5036
5037   for (phi = phi_nodes (exit->dest); phi; phi = next)
5038     {
5039       next = PHI_CHAIN (phi);
5040
5041       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5042         {
5043           def = PHI_RESULT (phi);
5044           remove_statement (phi, false);
5045           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5046                         def, op);
5047           SSA_NAME_DEF_STMT (def) = stmt;
5048           bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5049         }
5050     }
5051 }
5052
5053 /* Rewrites the final value of USE (that is only needed outside of the loop)
5054    using candidate CAND.  */
5055
5056 static void
5057 rewrite_use_outer (struct ivopts_data *data,
5058                    struct iv_use *use, struct iv_cand *cand)
5059 {
5060   edge exit;
5061   tree value, op, stmts, tgt;
5062   tree phi;
5063
5064   switch (TREE_CODE (use->stmt))
5065     {
5066     case PHI_NODE:
5067       tgt = PHI_RESULT (use->stmt);
5068       break;
5069     case MODIFY_EXPR:
5070       tgt = TREE_OPERAND (use->stmt, 0);
5071       break;
5072     default:
5073       gcc_unreachable ();
5074     }
5075
5076   exit = single_dom_exit (data->current_loop);
5077
5078   if (exit && !(exit->flags & EDGE_COMPLEX))
5079     {
5080       if (!cand->iv)
5081         {
5082           bool ok = may_replace_final_value (data, use, &value);
5083           gcc_assert (ok);
5084         }
5085       else
5086         value = get_computation_at (data->current_loop,
5087                                     use, cand, last_stmt (exit->src));
5088
5089       value = unshare_expr (value);
5090       op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5091
5092       /* If we will preserve the iv anyway and we would need to perform
5093          some computation to replace the final value, do nothing.  */
5094       if (stmts && name_info (data, tgt)->preserve_biv)
5095         return;
5096
5097       for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5098         {
5099           use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5100
5101           if (USE_FROM_PTR (use_p) == tgt)
5102             SET_USE (use_p, op);
5103         }
5104
5105       if (stmts)
5106         compute_phi_arg_on_exit (exit, stmts, op);
5107
5108       /* Enable removal of the statement.  We cannot remove it directly,
5109          since we may still need the aliasing information attached to the
5110          ssa name defined by it.  */
5111       name_info (data, tgt)->iv->have_use_for = false;
5112       return;
5113     }
5114
5115   /* If the variable is going to be preserved anyway, there is nothing to
5116      do.  */
5117   if (name_info (data, tgt)->preserve_biv)
5118     return;
5119
5120   /* Otherwise we just need to compute the iv.  */
5121   rewrite_use_nonlinear_expr (data, use, cand);
5122 }
5123
5124 /* Rewrites USE using candidate CAND.  */
5125
5126 static void
5127 rewrite_use (struct ivopts_data *data,
5128              struct iv_use *use, struct iv_cand *cand)
5129 {
5130   switch (use->type)
5131     {
5132       case USE_NONLINEAR_EXPR:
5133         rewrite_use_nonlinear_expr (data, use, cand);
5134         break;
5135
5136       case USE_OUTER:
5137         rewrite_use_outer (data, use, cand);
5138         break;
5139
5140       case USE_ADDRESS:
5141         rewrite_use_address (data, use, cand);
5142         break;
5143
5144       case USE_COMPARE:
5145         rewrite_use_compare (data, use, cand);
5146         break;
5147
5148       default:
5149         gcc_unreachable ();
5150     }
5151   modify_stmt (use->stmt);
5152 }
5153
5154 /* Rewrite the uses using the selected induction variables.  */
5155
5156 static void
5157 rewrite_uses (struct ivopts_data *data)
5158 {
5159   unsigned i;
5160   struct iv_cand *cand;
5161   struct iv_use *use;
5162
5163   for (i = 0; i < n_iv_uses (data); i++)
5164     {
5165       use = iv_use (data, i);
5166       cand = use->selected;
5167       gcc_assert (cand);
5168
5169       rewrite_use (data, use, cand);
5170     }
5171 }
5172
5173 /* Removes the ivs that are not used after rewriting.  */
5174
5175 static void
5176 remove_unused_ivs (struct ivopts_data *data)
5177 {
5178   unsigned j;
5179   bitmap_iterator bi;
5180
5181   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5182     {
5183       struct version_info *info;
5184
5185       info = ver_info (data, j);
5186       if (info->iv
5187           && !zero_p (info->iv->step)
5188           && !info->inv_id
5189           && !info->iv->have_use_for
5190           && !info->preserve_biv)
5191         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5192     }
5193 }
5194
5195 /* Frees data allocated by the optimization of a single loop.  */
5196
5197 static void
5198 free_loop_data (struct ivopts_data *data)
5199 {
5200   unsigned i, j;
5201   bitmap_iterator bi;
5202
5203   htab_empty (data->niters);
5204
5205   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5206     {
5207       struct version_info *info;
5208
5209       info = ver_info (data, i);
5210       if (info->iv)
5211         free (info->iv);
5212       info->iv = NULL;
5213       info->has_nonlin_use = false;
5214       info->preserve_biv = false;
5215       info->inv_id = 0;
5216     }
5217   bitmap_clear (data->relevant);
5218   bitmap_clear (data->important_candidates);
5219
5220   for (i = 0; i < n_iv_uses (data); i++)
5221     {
5222       struct iv_use *use = iv_use (data, i);
5223
5224       free (use->iv);
5225       BITMAP_FREE (use->related_cands);
5226       for (j = 0; j < use->n_map_members; j++)
5227         if (use->cost_map[j].depends_on)
5228           BITMAP_FREE (use->cost_map[j].depends_on);
5229       free (use->cost_map);
5230       free (use);
5231     }
5232   VARRAY_POP_ALL (data->iv_uses);
5233
5234   for (i = 0; i < n_iv_cands (data); i++)
5235     {
5236       struct iv_cand *cand = iv_cand (data, i);
5237
5238       if (cand->iv)
5239         free (cand->iv);
5240       free (cand);
5241     }
5242   VARRAY_POP_ALL (data->iv_candidates);
5243
5244   if (data->version_info_size < num_ssa_names)
5245     {
5246       data->version_info_size = 2 * num_ssa_names;
5247       free (data->version_info);
5248       data->version_info = xcalloc (data->version_info_size,
5249                                     sizeof (struct version_info));
5250     }
5251
5252   data->max_inv_id = 0;
5253
5254   for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
5255     {
5256       tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
5257
5258       SET_DECL_RTL (obj, NULL_RTX);
5259     }
5260   VARRAY_POP_ALL (decl_rtl_to_reset);
5261 }
5262
5263 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5264    loop tree.  */
5265
5266 static void
5267 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5268 {
5269   unsigned i;
5270
5271   for (i = 1; i < loops->num; i++)
5272     if (loops->parray[i])
5273       {
5274         free (loops->parray[i]->aux);
5275         loops->parray[i]->aux = NULL;
5276       }
5277
5278   free_loop_data (data);
5279   free (data->version_info);
5280   BITMAP_FREE (data->relevant);
5281   BITMAP_FREE (data->important_candidates);
5282   htab_delete (data->niters);
5283
5284   VARRAY_FREE (decl_rtl_to_reset);
5285   VARRAY_FREE (data->iv_uses);
5286   VARRAY_FREE (data->iv_candidates);
5287 }
5288
5289 /* Optimizes the LOOP.  Returns true if anything changed.  */
5290
5291 static bool
5292 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5293 {
5294   bool changed = false;
5295   struct iv_ca *iv_ca;
5296   edge exit;
5297
5298   data->current_loop = loop;
5299
5300   if (dump_file && (dump_flags & TDF_DETAILS))
5301     {
5302       fprintf (dump_file, "Processing loop %d\n", loop->num);
5303
5304       exit = single_dom_exit (loop);
5305       if (exit)
5306         {
5307           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5308                    exit->src->index, exit->dest->index);
5309           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5310           fprintf (dump_file, "\n");
5311         }
5312
5313       fprintf (dump_file, "\n");
5314     }
5315
5316   /* For each ssa name determines whether it behaves as an induction variable
5317      in some loop.  */
5318   if (!find_induction_variables (data))
5319     goto finish;
5320
5321   /* Finds interesting uses (item 1).  */
5322   find_interesting_uses (data);
5323   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5324     goto finish;
5325
5326   /* Finds candidates for the induction variables (item 2).  */
5327   find_iv_candidates (data);
5328
5329   /* Calculates the costs (item 3, part 1).  */
5330   determine_use_iv_costs (data);
5331   determine_iv_costs (data);
5332   determine_set_costs (data);
5333
5334   /* Find the optimal set of induction variables (item 3, part 2).  */
5335   iv_ca = find_optimal_iv_set (data);
5336   if (!iv_ca)
5337     goto finish;
5338   changed = true;
5339
5340   /* Create the new induction variables (item 4, part 1).  */
5341   create_new_ivs (data, iv_ca);
5342   iv_ca_free (&iv_ca);
5343
5344   /* Rewrite the uses (item 4, part 2).  */
5345   rewrite_uses (data);
5346
5347   /* Remove the ivs that are unused after rewriting.  */
5348   remove_unused_ivs (data);
5349
5350   /* We have changed the structure of induction variables; it might happen
5351      that definitions in the scev database refer to some of them that were
5352      eliminated.  */
5353   scev_reset ();
5354
5355 finish:
5356   free_loop_data (data);
5357
5358   return changed;
5359 }
5360
5361 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5362
5363 void
5364 tree_ssa_iv_optimize (struct loops *loops)
5365 {
5366   struct loop *loop;
5367   struct ivopts_data data;
5368
5369   tree_ssa_iv_optimize_init (loops, &data);
5370
5371   /* Optimize the loops starting with the innermost ones.  */
5372   loop = loops->tree_root;
5373   while (loop->inner)
5374     loop = loop->inner;
5375
5376 #ifdef ENABLE_CHECKING
5377   verify_loop_closed_ssa ();
5378   verify_stmts ();
5379 #endif
5380
5381   /* Scan the loops, inner ones first.  */
5382   while (loop != loops->tree_root)
5383     {
5384       if (dump_file && (dump_flags & TDF_DETAILS))
5385         flow_loop_dump (loop, dump_file, NULL, 1);
5386
5387       tree_ssa_iv_optimize_loop (&data, loop);
5388
5389       if (loop->next)
5390         {
5391           loop = loop->next;
5392           while (loop->inner)
5393             loop = loop->inner;
5394         }
5395       else
5396         loop = loop->outer;
5397     }
5398
5399 #ifdef ENABLE_CHECKING
5400   verify_loop_closed_ssa ();
5401   verify_stmts ();
5402 #endif
5403
5404   tree_ssa_iv_optimize_finalize (loops, &data);
5405 }