old-autovect-branch/gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Information attached to loop.  */
 125 struct loop_data
 126 {
 127   unsigned regs_used;   /* Number of registers used.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_OUTER,            /* The induction variable is used outside the loop.  */
 135   USE_ADDRESS,          /* Use in an address.  */
 136   USE_COMPARE           /* Use is a compare.  */
 137 };
 138
 139 /* The candidate - cost pair.  */
 140 struct cost_pair
 141 {
 142   struct iv_cand *cand; /* The candidate.  */
 143   unsigned cost;        /* The cost.  */
 144   bitmap depends_on;    /* The list of invariants that have to be
 145                            preserved.  */
 146   tree value;           /* For final value elimination, the expression for
 147                            the final value of the iv.  For iv elimination,
 148                            the new bound to compare with.  */
 149 };
 150
 151 /* Use.  */
 152 struct iv_use
 153 {
 154   unsigned id;          /* The id of the use.  */
 155   enum use_type type;   /* Type of the use.  */
 156   struct iv *iv;        /* The induction variable it is based on.  */
 157   tree stmt;            /* Statement in that it occurs.  */
 158   tree *op_p;           /* The place where it occurs.  */
 159   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 160                            important ones.  */
 161
 162   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 163   struct cost_pair *cost_map;
 164                         /* The costs wrto the iv candidates.  */
 165
 166   struct iv_cand *selected;
 167                         /* The selected candidate.  */
 168 };
 169
 170 /* The position where the iv is computed.  */
 171 enum iv_position
 172 {
 173   IP_NORMAL,            /* At the end, just before the exit condition.  */
 174   IP_END,               /* At the end of the latch block.  */
 175   IP_ORIGINAL           /* The original biv.  */
 176 };
 177
 178 /* The induction variable candidate.  */
 179 struct iv_cand
 180 {
 181   unsigned id;          /* The number of the candidate.  */
 182   bool important;       /* Whether this is an "important" candidate, i.e. such
 183                            that it should be considered by all uses.  */
 184   enum iv_position pos; /* Where it is computed.  */
 185   tree incremented_at;  /* For original biv, the statement where it is
 186                            incremented.  */
 187   tree var_before;      /* The variable used for it before increment.  */
 188   tree var_after;       /* The variable used for it after increment.  */
 189   struct iv *iv;        /* The value of the candidate.  NULL for
 190                            "pseudocandidate" used to indicate the possibility
 191                            to replace the final value of an iv by direct
 192                            computation of the value.  */
 193   unsigned cost;        /* Cost of the candidate.  */
 194   bitmap depends_on;    /* The list of invariants that are used in step of the
 195                            biv.  */
 196 };
 197
 198 /* The data used by the induction variable optimizations.  */
 199
 200 typedef struct iv_use *iv_use_p;
 201 DEF_VEC_P(iv_use_p);
 202 DEF_VEC_ALLOC_P(iv_use_p,heap);
 203
 204 typedef struct iv_cand *iv_cand_p;
 205 DEF_VEC_P(iv_cand_p);
 206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 207
 208 struct ivopts_data
 209 {
 210   /* The currently optimized loop.  */
 211   struct loop *current_loop;
 212
 213   /* Numbers of iterations for all exits of the current loop.  */
 214   htab_t niters;
 215
 216   /* The size of version_info array allocated.  */
 217   unsigned version_info_size;
 218
 219   /* The array of information for the ssa names.  */
 220   struct version_info *version_info;
 221
 222   /* The bitmap of indices in version_info whose value was changed.  */
 223   bitmap relevant;
 224
 225   /* The maximum invariant id.  */
 226   unsigned max_inv_id;
 227
 228   /* The uses of induction variables.  */
 229   VEC(iv_use_p,heap) *iv_uses;
 230
 231   /* The candidates.  */
 232   VEC(iv_cand_p,heap) *iv_candidates;
 233
 234   /* A bitmap of important candidates.  */
 235   bitmap important_candidates;
 236
 237   /* Whether to consider just related and important candidates when replacing a
 238      use.  */
 239   bool consider_all_candidates;
 240 };
 241
 242 /* An assignment of iv candidates to uses.  */
 243
 244 struct iv_ca
 245 {
 246   /* The number of uses covered by the assignment.  */
 247   unsigned upto;
 248
 249   /* Number of uses that cannot be expressed by the candidates in the set.  */
 250   unsigned bad_uses;
 251
 252   /* Candidate assigned to a use, together with the related costs.  */
 253   struct cost_pair **cand_for_use;
 254
 255   /* Number of times each candidate is used.  */
 256   unsigned *n_cand_uses;
 257
 258   /* The candidates used.  */
 259   bitmap cands;
 260
 261   /* The number of candidates in the set.  */
 262   unsigned n_cands;
 263
 264   /* Total number of registers needed.  */
 265   unsigned n_regs;
 266
 267   /* Total cost of expressing uses.  */
 268   unsigned cand_use_cost;
 269
 270   /* Total cost of candidates.  */
 271   unsigned cand_cost;
 272
 273   /* Number of times each invariant is used.  */
 274   unsigned *n_invariant_uses;
 275
 276   /* Total cost of the assignment.  */
 277   unsigned cost;
 278 };
 279
 280 /* Difference of two iv candidate assignments.  */
 281
 282 struct iv_ca_delta
 283 {
 284   /* Changed use.  */
 285   struct iv_use *use;
 286
 287   /* An old assignment (for rollback purposes).  */
 288   struct cost_pair *old_cp;
 289
 290   /* A new assignment.  */
 291   struct cost_pair *new_cp;
 292
 293   /* Next change in the list.  */
 294   struct iv_ca_delta *next_change;
 295 };
 296
 297 /* Bound on number of candidates below that all candidates are considered.  */
 298
 299 #define CONSIDER_ALL_CANDIDATES_BOUND \
 300   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 301
 302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 303    optimizing such a loop would help, and it would take ages).  */
 304
 305 #define MAX_CONSIDERED_USES \
 306   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 307
 308 /* If there are at most this number of ivs in the set, try removing unnecessary
 309    ivs from the set always.  */
 310
 311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 312   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 313
 314 /* The list of trees for that the decl_rtl field must be reset is stored
 315    here.  */
 316
 317 static VEC(tree,heap) *decl_rtl_to_reset;
 318
 319 /* Number of uses recorded in DATA.  */
 320
 321 static inline unsigned
 322 n_iv_uses (struct ivopts_data *data)
 323 {
 324   return VEC_length (iv_use_p, data->iv_uses);
 325 }
 326
 327 /* Ith use recorded in DATA.  */
 328
 329 static inline struct iv_use *
 330 iv_use (struct ivopts_data *data, unsigned i)
 331 {
 332   return VEC_index (iv_use_p, data->iv_uses, i);
 333 }
 334
 335 /* Number of candidates recorded in DATA.  */
 336
 337 static inline unsigned
 338 n_iv_cands (struct ivopts_data *data)
 339 {
 340   return VEC_length (iv_cand_p, data->iv_candidates);
 341 }
 342
 343 /* Ith candidate recorded in DATA.  */
 344
 345 static inline struct iv_cand *
 346 iv_cand (struct ivopts_data *data, unsigned i)
 347 {
 348   return VEC_index (iv_cand_p, data->iv_candidates, i);
 349 }
 350
 351 /* The data for LOOP.  */
 352
 353 static inline struct loop_data *
 354 loop_data (struct loop *loop)
 355 {
 356   return loop->aux;
 357 }
 358
 359 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 360
 361 edge
 362 single_dom_exit (struct loop *loop)
 363 {
 364   edge exit = loop->single_exit;
 365
 366   if (!exit)
 367     return NULL;
 368
 369   if (!just_once_each_iteration_p (loop, exit->src))
 370     return NULL;
 371
 372   return exit;
 373 }
 374
 375 /* Dumps information about the induction variable IV to FILE.  */
 376
 377 extern void dump_iv (FILE *, struct iv *);
 378 void
 379 dump_iv (FILE *file, struct iv *iv)
 380 {
 381   if (iv->ssa_name)
 382     {
 383       fprintf (file, "ssa name ");
 384       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 385       fprintf (file, "\n");
 386     }
 387
 388   fprintf (file, "  type ");
 389   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 390   fprintf (file, "\n");
 391
 392   if (iv->step)
 393     {
 394       fprintf (file, "  base ");
 395       print_generic_expr (file, iv->base, TDF_SLIM);
 396       fprintf (file, "\n");
 397
 398       fprintf (file, "  step ");
 399       print_generic_expr (file, iv->step, TDF_SLIM);
 400       fprintf (file, "\n");
 401     }
 402   else
 403     {
 404       fprintf (file, "  invariant ");
 405       print_generic_expr (file, iv->base, TDF_SLIM);
 406       fprintf (file, "\n");
 407     }
 408
 409   if (iv->base_object)
 410     {
 411       fprintf (file, "  base object ");
 412       print_generic_expr (file, iv->base_object, TDF_SLIM);
 413       fprintf (file, "\n");
 414     }
 415
 416   if (iv->biv_p)
 417     fprintf (file, "  is a biv\n");
 418 }
 419
 420 /* Dumps information about the USE to FILE.  */
 421
 422 extern void dump_use (FILE *, struct iv_use *);
 423 void
 424 dump_use (FILE *file, struct iv_use *use)
 425 {
 426   fprintf (file, "use %d\n", use->id);
 427
 428   switch (use->type)
 429     {
 430     case USE_NONLINEAR_EXPR:
 431       fprintf (file, "  generic\n");
 432       break;
 433
 434     case USE_OUTER:
 435       fprintf (file, "  outside\n");
 436       break;
 437
 438     case USE_ADDRESS:
 439       fprintf (file, "  address\n");
 440       break;
 441
 442     case USE_COMPARE:
 443       fprintf (file, "  compare\n");
 444       break;
 445
 446     default:
 447       gcc_unreachable ();
 448     }
 449
 450   fprintf (file, "  in statement ");
 451   print_generic_expr (file, use->stmt, TDF_SLIM);
 452   fprintf (file, "\n");
 453
 454   fprintf (file, "  at position ");
 455   if (use->op_p)
 456     print_generic_expr (file, *use->op_p, TDF_SLIM);
 457   fprintf (file, "\n");
 458
 459   dump_iv (file, use->iv);
 460
 461   if (use->related_cands)
 462     {
 463       fprintf (file, "  related candidates ");
 464       dump_bitmap (file, use->related_cands);
 465     }
 466 }
 467
 468 /* Dumps information about the uses to FILE.  */
 469
 470 extern void dump_uses (FILE *, struct ivopts_data *);
 471 void
 472 dump_uses (FILE *file, struct ivopts_data *data)
 473 {
 474   unsigned i;
 475   struct iv_use *use;
 476
 477   for (i = 0; i < n_iv_uses (data); i++)
 478     {
 479       use = iv_use (data, i);
 480
 481       dump_use (file, use);
 482       fprintf (file, "\n");
 483     }
 484 }
 485
 486 /* Dumps information about induction variable candidate CAND to FILE.  */
 487
 488 extern void dump_cand (FILE *, struct iv_cand *);
 489 void
 490 dump_cand (FILE *file, struct iv_cand *cand)
 491 {
 492   struct iv *iv = cand->iv;
 493
 494   fprintf (file, "candidate %d%s\n",
 495            cand->id, cand->important ? " (important)" : "");
 496
 497   if (cand->depends_on)
 498     {
 499       fprintf (file, "  depends on ");
 500       dump_bitmap (file, cand->depends_on);
 501     }
 502
 503   if (!iv)
 504     {
 505       fprintf (file, "  final value replacement\n");
 506       return;
 507     }
 508
 509   switch (cand->pos)
 510     {
 511     case IP_NORMAL:
 512       fprintf (file, "  incremented before exit test\n");
 513       break;
 514
 515     case IP_END:
 516       fprintf (file, "  incremented at end\n");
 517       break;
 518
 519     case IP_ORIGINAL:
 520       fprintf (file, "  original biv\n");
 521       break;
 522     }
 523
 524   dump_iv (file, iv);
 525 }
 526
 527 /* Returns the info for ssa version VER.  */
 528
 529 static inline struct version_info *
 530 ver_info (struct ivopts_data *data, unsigned ver)
 531 {
 532   return data->version_info + ver;
 533 }
 534
 535 /* Returns the info for ssa name NAME.  */
 536
 537 static inline struct version_info *
 538 name_info (struct ivopts_data *data, tree name)
 539 {
 540   return ver_info (data, SSA_NAME_VERSION (name));
 541 }
 542
 543 /* Checks whether there exists number X such that X * B = A, counting modulo
 544    2^BITS.  */
 545
 546 static bool
 547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 548         HOST_WIDE_INT *x)
 549 {
 550   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 551   unsigned HOST_WIDE_INT inv, ex, val;
 552   unsigned i;
 553
 554   a &= mask;
 555   b &= mask;
 556
 557   /* First divide the whole equation by 2 as long as possible.  */
 558   while (!(a & 1) && !(b & 1))
 559     {
 560       a >>= 1;
 561       b >>= 1;
 562       bits--;
 563       mask >>= 1;
 564     }
 565
 566   if (!(b & 1))
 567     {
 568       /* If b is still even, a is odd and there is no such x.  */
 569       return false;
 570     }
 571
 572   /* Find the inverse of b.  We compute it as
 573      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 574   inv = 1;
 575   ex = b;
 576   for (i = 0; i < bits - 1; i++)
 577     {
 578       inv = (inv * ex) & mask;
 579       ex = (ex * ex) & mask;
 580     }
 581
 582   val = (a * inv) & mask;
 583
 584   gcc_assert (((val * b) & mask) == a);
 585
 586   if ((val >> (bits - 1)) & 1)
 587     val |= ~mask;
 588
 589   *x = val;
 590
 591   return true;
 592 }
 593
 594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 595    emitted in LOOP.  */
 596
 597 static bool
 598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 599 {
 600   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 601
 602   gcc_assert (bb);
 603
 604   if (sbb == loop->latch)
 605     return true;
 606
 607   if (sbb != bb)
 608     return false;
 609
 610   return stmt == last_stmt (bb);
 611 }
 612
 613 /* Returns true if STMT if after the place where the original induction
 614    variable CAND is incremented.  */
 615
 616 static bool
 617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 618 {
 619   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 620   basic_block stmt_bb = bb_for_stmt (stmt);
 621   block_stmt_iterator bsi;
 622
 623   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 624     return false;
 625
 626   if (stmt_bb != cand_bb)
 627     return true;
 628
 629   /* Scan the block from the end, since the original ivs are usually
 630      incremented at the end of the loop body.  */
 631   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 632     {
 633       if (bsi_stmt (bsi) == cand->incremented_at)
 634         return false;
 635       if (bsi_stmt (bsi) == stmt)
 636         return true;
 637     }
 638 }
 639
 640 /* Returns true if STMT if after the place where the induction variable
 641    CAND is incremented in LOOP.  */
 642
 643 static bool
 644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 645 {
 646   switch (cand->pos)
 647     {
 648     case IP_END:
 649       return false;
 650
 651     case IP_NORMAL:
 652       return stmt_after_ip_normal_pos (loop, stmt);
 653
 654     case IP_ORIGINAL:
 655       return stmt_after_ip_original_pos (cand, stmt);
 656
 657     default:
 658       gcc_unreachable ();
 659     }
 660 }
 661
 662 /* Element of the table in that we cache the numbers of iterations obtained
 663    from exits of the loop.  */
 664
 665 struct nfe_cache_elt
 666 {
 667   /* The edge for that the number of iterations is cached.  */
 668   edge exit;
 669
 670   /* True if the # of iterations was successfully determined.  */
 671   bool valid_p;
 672
 673   /* Description of # of iterations.  */
 674   struct tree_niter_desc niter;
 675 };
 676
 677 /* Hash function for nfe_cache_elt E.  */
 678
 679 static hashval_t
 680 nfe_hash (const void *e)
 681 {
 682   const struct nfe_cache_elt *elt = e;
 683
 684   return htab_hash_pointer (elt->exit);
 685 }
 686
 687 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 688
 689 static int
 690 nfe_eq (const void *e1, const void *e2)
 691 {
 692   const struct nfe_cache_elt *elt1 = e1;
 693
 694   return elt1->exit == e2;
 695 }
 696
 697 /*  Returns structure describing number of iterations determined from
 698     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 699
 700 static struct tree_niter_desc *
 701 niter_for_exit (struct ivopts_data *data, edge exit)
 702 {
 703   struct nfe_cache_elt *nfe_desc;
 704   PTR *slot;
 705
 706   slot = htab_find_slot_with_hash (data->niters, exit,
 707                                    htab_hash_pointer (exit),
 708                                    INSERT);
 709
 710   if (!*slot)
 711     {
 712       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 713       nfe_desc->exit = exit;
 714       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 715                                                      exit, &nfe_desc->niter,
 716                                                      true);
 717       *slot = nfe_desc;
 718     }
 719   else
 720     nfe_desc = *slot;
 721
 722   if (!nfe_desc->valid_p)
 723     return NULL;
 724
 725   return &nfe_desc->niter;
 726 }
 727
 728 /* Returns structure describing number of iterations determined from
 729    single dominating exit of DATA->current_loop, or NULL if something
 730    goes wrong.  */
 731
 732 static struct tree_niter_desc *
 733 niter_for_single_dom_exit (struct ivopts_data *data)
 734 {
 735   edge exit = single_dom_exit (data->current_loop);
 736
 737   if (!exit)
 738     return NULL;
 739
 740   return niter_for_exit (data, exit);
 741 }
 742
 743 /* Initializes data structures used by the iv optimization pass, stored
 744    in DATA.  LOOPS is the loop tree.  */
 745
 746 static void
 747 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 748 {
 749   unsigned i;
 750
 751   data->version_info_size = 2 * num_ssa_names;
 752   data->version_info = xcalloc (data->version_info_size,
 753                                 sizeof (struct version_info));
 754   data->relevant = BITMAP_ALLOC (NULL);
 755   data->important_candidates = BITMAP_ALLOC (NULL);
 756   data->max_inv_id = 0;
 757   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 758
 759   for (i = 1; i < loops->num; i++)
 760     if (loops->parray[i])
 761       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 762
 763   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 764   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 765   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 766 }
 767
 768 /* Returns a memory object to that EXPR points.  In case we are able to
 769    determine that it does not point to any such object, NULL is returned.  */
 770
 771 static tree
 772 determine_base_object (tree expr)
 773 {
 774   enum tree_code code = TREE_CODE (expr);
 775   tree base, obj, op0, op1;
 776
 777   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 778     return NULL_TREE;
 779
 780   switch (code)
 781     {
 782     case INTEGER_CST:
 783       return NULL_TREE;
 784
 785     case ADDR_EXPR:
 786       obj = TREE_OPERAND (expr, 0);
 787       base = get_base_address (obj);
 788
 789       if (!base)
 790         return expr;
 791
 792       if (TREE_CODE (base) == INDIRECT_REF)
 793         return determine_base_object (TREE_OPERAND (base, 0));
 794
 795       return fold_convert (ptr_type_node,
 796                            build_fold_addr_expr (base));
 797
 798     case PLUS_EXPR:
 799     case MINUS_EXPR:
 800       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 801       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 802
 803       if (!op1)
 804         return op0;
 805
 806       if (!op0)
 807         return (code == PLUS_EXPR
 808                 ? op1
 809                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 810
 811       return fold_build2 (code, ptr_type_node, op0, op1);
 812
 813     case NOP_EXPR:
 814     case CONVERT_EXPR:
 815       return determine_base_object (TREE_OPERAND (expr, 0));
 816
 817     default:
 818       return fold_convert (ptr_type_node, expr);
 819     }
 820 }
 821
 822 /* Allocates an induction variable with given initial value BASE and step STEP
 823    for loop LOOP.  */
 824
 825 static struct iv *
 826 alloc_iv (tree base, tree step)
 827 {
 828   struct iv *iv = xcalloc (1, sizeof (struct iv));
 829
 830   if (step && integer_zerop (step))
 831     step = NULL_TREE;
 832
 833   iv->base = base;
 834   iv->base_object = determine_base_object (base);
 835   iv->step = step;
 836   iv->biv_p = false;
 837   iv->have_use_for = false;
 838   iv->use_id = 0;
 839   iv->ssa_name = NULL_TREE;
 840
 841   return iv;
 842 }
 843
 844 /* Sets STEP and BASE for induction variable IV.  */
 845
 846 static void
 847 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 848 {
 849   struct version_info *info = name_info (data, iv);
 850
 851   gcc_assert (!info->iv);
 852
 853   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 854   info->iv = alloc_iv (base, step);
 855   info->iv->ssa_name = iv;
 856 }
 857
 858 /* Finds induction variable declaration for VAR.  */
 859
 860 static struct iv *
 861 get_iv (struct ivopts_data *data, tree var)
 862 {
 863   basic_block bb;
 864
 865   if (!name_info (data, var)->iv)
 866     {
 867       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 868
 869       if (!bb
 870           || !flow_bb_inside_loop_p (data->current_loop, bb))
 871         set_iv (data, var, var, NULL_TREE);
 872     }
 873
 874   return name_info (data, var)->iv;
 875 }
 876
 877 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 878    not define a simple affine biv with nonzero step.  */
 879
 880 static tree
 881 determine_biv_step (tree phi)
 882 {
 883   struct loop *loop = bb_for_stmt (phi)->loop_father;
 884   tree name = PHI_RESULT (phi), base, step;
 885
 886   if (!is_gimple_reg (name))
 887     return NULL_TREE;
 888
 889   if (!simple_iv (loop, phi, name, &base, &step, true))
 890     return NULL_TREE;
 891
 892   if (zero_p (step))
 893     return NULL_TREE;
 894
 895   return step;
 896 }
 897
 898 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 899
 900 static bool
 901 abnormal_ssa_name_p (tree exp)
 902 {
 903   if (!exp)
 904     return false;
 905
 906   if (TREE_CODE (exp) != SSA_NAME)
 907     return false;
 908
 909   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 910 }
 911
 912 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 913    abnormal phi node.  Callback for for_each_index.  */
 914
 915 static bool
 916 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 917                                   void *data ATTRIBUTE_UNUSED)
 918 {
 919   if (TREE_CODE (base) == ARRAY_REF)
 920     {
 921       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 922         return false;
 923       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 924         return false;
 925     }
 926
 927   return !abnormal_ssa_name_p (*index);
 928 }
 929
 930 /* Returns true if EXPR contains a ssa name that occurs in an
 931    abnormal phi node.  */
 932
 933 static bool
 934 contains_abnormal_ssa_name_p (tree expr)
 935 {
 936   enum tree_code code;
 937   enum tree_code_class class;
 938
 939   if (!expr)
 940     return false;
 941
 942   code = TREE_CODE (expr);
 943   class = TREE_CODE_CLASS (code);
 944
 945   if (code == SSA_NAME)
 946     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 947
 948   if (code == INTEGER_CST
 949       || is_gimple_min_invariant (expr))
 950     return false;
 951
 952   if (code == ADDR_EXPR)
 953     return !for_each_index (&TREE_OPERAND (expr, 0),
 954                             idx_contains_abnormal_ssa_name_p,
 955                             NULL);
 956
 957   switch (class)
 958     {
 959     case tcc_binary:
 960     case tcc_comparison:
 961       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 962         return true;
 963
 964       /* Fallthru.  */
 965     case tcc_unary:
 966       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 967         return true;
 968
 969       break;
 970
 971     default:
 972       gcc_unreachable ();
 973     }
 974
 975   return false;
 976 }
 977
 978 /* Finds basic ivs.  */
 979
 980 static bool
 981 find_bivs (struct ivopts_data *data)
 982 {
 983   tree phi, step, type, base;
 984   bool found = false;
 985   struct loop *loop = data->current_loop;
 986   int i;
 987   VEC (tree, heap) *loop_phis;
 988
 989   /* For allowing scev to remove some loop phi nodes in
 990      unify_peeled_chrec, we have to compute the scev information
 991      before assuming that each phi node is an induction variable.  */
 992   loop_phis = VEC_alloc (tree, heap, 2);
 993   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 994     {
 995       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 996         continue;
 997
 998       VEC_safe_push (tree, heap, loop_phis, phi);
 999     }
1000
1001   for (i = 0; VEC_iterate(tree, loop_phis, i, phi); i++)
1002     determine_biv_step (phi);
1003
1004   VEC_free (tree, heap, loop_phis);
1005
1006   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1007     {
1008       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1009         continue;
1010
1011       step = determine_biv_step (phi);
1012       if (!step)
1013         continue;
1014
1015       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1016       base = expand_simple_operations (base);
1017       if (contains_abnormal_ssa_name_p (base)
1018           || contains_abnormal_ssa_name_p (step))
1019         continue;
1020
1021       type = TREE_TYPE (PHI_RESULT (phi));
1022       base = fold_convert (type, base);
1023       if (step)
1024         step = fold_convert (type, step);
1025
1026       set_iv (data, PHI_RESULT (phi), base, step);
1027       found = true;
1028     }
1029
1030   return found;
1031 }
1032
1033 /* Marks basic ivs.  */
1034
1035 static void
1036 mark_bivs (struct ivopts_data *data)
1037 {
1038   tree phi, var;
1039   struct iv *iv, *incr_iv;
1040   struct loop *loop = data->current_loop;
1041   basic_block incr_bb;
1042
1043   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1044     {
1045       iv = get_iv (data, PHI_RESULT (phi));
1046       if (!iv)
1047         continue;
1048
1049       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1050       incr_iv = get_iv (data, var);
1051       if (!incr_iv)
1052         continue;
1053
1054       /* If the increment is in the subloop, ignore it.  */
1055       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1056       if (incr_bb->loop_father != data->current_loop
1057           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1058         continue;
1059
1060       iv->biv_p = true;
1061       incr_iv->biv_p = true;
1062     }
1063 }
1064
1065 /* Checks whether STMT defines a linear induction variable and stores its
1066    parameters to BASE and STEP.  */
1067
1068 static bool
1069 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1070                         tree *base, tree *step)
1071 {
1072   tree lhs;
1073   struct loop *loop = data->current_loop;
1074
1075   *base = NULL_TREE;
1076   *step = NULL_TREE;
1077
1078   if (TREE_CODE (stmt) != MODIFY_EXPR)
1079     return false;
1080
1081   lhs = TREE_OPERAND (stmt, 0);
1082   if (TREE_CODE (lhs) != SSA_NAME)
1083     return false;
1084
1085   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1086     return false;
1087   *base = expand_simple_operations (*base);
1088
1089   if (contains_abnormal_ssa_name_p (*base)
1090       || contains_abnormal_ssa_name_p (*step))
1091     return false;
1092
1093   return true;
1094 }
1095
1096 /* Finds general ivs in statement STMT.  */
1097
1098 static void
1099 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1100 {
1101   tree base, step;
1102
1103   if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1104     return;
1105
1106   set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1107 }
1108
1109 /* Finds general ivs in basic block BB.  */
1110
1111 static void
1112 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1113 {
1114   block_stmt_iterator bsi;
1115
1116   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1117     find_givs_in_stmt (data, bsi_stmt (bsi));
1118 }
1119
1120 /* Finds general ivs.  */
1121
1122 static void
1123 find_givs (struct ivopts_data *data)
1124 {
1125   struct loop *loop = data->current_loop;
1126   basic_block *body = get_loop_body_in_dom_order (loop);
1127   unsigned i;
1128
1129   for (i = 0; i < loop->num_nodes; i++)
1130     find_givs_in_bb (data, body[i]);
1131   free (body);
1132 }
1133
1134 /* For each ssa name defined in LOOP determines whether it is an induction
1135    variable and if so, its initial value and step.  */
1136
1137 static bool
1138 find_induction_variables (struct ivopts_data *data)
1139 {
1140   unsigned i;
1141   bitmap_iterator bi;
1142
1143   if (!find_bivs (data))
1144     return false;
1145
1146   find_givs (data);
1147   mark_bivs (data);
1148
1149   if (dump_file && (dump_flags & TDF_DETAILS))
1150     {
1151       struct tree_niter_desc *niter;
1152
1153       niter = niter_for_single_dom_exit (data);
1154
1155       if (niter)
1156         {
1157           fprintf (dump_file, "  number of iterations ");
1158           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1159           fprintf (dump_file, "\n");
1160
1161           fprintf (dump_file, "  may be zero if ");
1162           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1163           fprintf (dump_file, "\n");
1164           fprintf (dump_file, "\n");
1165         };
1166
1167       fprintf (dump_file, "Induction variables:\n\n");
1168
1169       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1170         {
1171           if (ver_info (data, i)->iv)
1172             dump_iv (dump_file, ver_info (data, i)->iv);
1173         }
1174     }
1175
1176   return true;
1177 }
1178
1179 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1180
1181 static struct iv_use *
1182 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1183             tree stmt, enum use_type use_type)
1184 {
1185   struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1186
1187   use->id = n_iv_uses (data);
1188   use->type = use_type;
1189   use->iv = iv;
1190   use->stmt = stmt;
1191   use->op_p = use_p;
1192   use->related_cands = BITMAP_ALLOC (NULL);
1193
1194   /* To avoid showing ssa name in the dumps, if it was not reset by the
1195      caller.  */
1196   iv->ssa_name = NULL_TREE;
1197
1198   if (dump_file && (dump_flags & TDF_DETAILS))
1199     dump_use (dump_file, use);
1200
1201   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1202
1203   return use;
1204 }
1205
1206 /* Checks whether OP is a loop-level invariant and if so, records it.
1207    NONLINEAR_USE is true if the invariant is used in a way we do not
1208    handle specially.  */
1209
1210 static void
1211 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1212 {
1213   basic_block bb;
1214   struct version_info *info;
1215
1216   if (TREE_CODE (op) != SSA_NAME
1217       || !is_gimple_reg (op))
1218     return;
1219
1220   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1221   if (bb
1222       && flow_bb_inside_loop_p (data->current_loop, bb))
1223     return;
1224
1225   info = name_info (data, op);
1226   info->name = op;
1227   info->has_nonlin_use |= nonlinear_use;
1228   if (!info->inv_id)
1229     info->inv_id = ++data->max_inv_id;
1230   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1231 }
1232
1233 /* Checks whether the use OP is interesting and if so, records it
1234    as TYPE.  */
1235
1236 static struct iv_use *
1237 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1238                                        enum use_type type)
1239 {
1240   struct iv *iv;
1241   struct iv *civ;
1242   tree stmt;
1243   struct iv_use *use;
1244
1245   if (TREE_CODE (op) != SSA_NAME)
1246     return NULL;
1247
1248   iv = get_iv (data, op);
1249   if (!iv)
1250     return NULL;
1251
1252   if (iv->have_use_for)
1253     {
1254       use = iv_use (data, iv->use_id);
1255
1256       gcc_assert (use->type == USE_NONLINEAR_EXPR
1257                   || use->type == USE_OUTER);
1258
1259       if (type == USE_NONLINEAR_EXPR)
1260         use->type = USE_NONLINEAR_EXPR;
1261       return use;
1262     }
1263
1264   if (zero_p (iv->step))
1265     {
1266       record_invariant (data, op, true);
1267       return NULL;
1268     }
1269   iv->have_use_for = true;
1270
1271   civ = xmalloc (sizeof (struct iv));
1272   *civ = *iv;
1273
1274   stmt = SSA_NAME_DEF_STMT (op);
1275   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1276               || TREE_CODE (stmt) == MODIFY_EXPR);
1277
1278   use = record_use (data, NULL, civ, stmt, type);
1279   iv->use_id = use->id;
1280
1281   return use;
1282 }
1283
1284 /* Checks whether the use OP is interesting and if so, records it.  */
1285
1286 static struct iv_use *
1287 find_interesting_uses_op (struct ivopts_data *data, tree op)
1288 {
1289   return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1290 }
1291
1292 /* Records a definition of induction variable OP that is used outside of the
1293    loop.  */
1294
1295 static struct iv_use *
1296 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1297 {
1298   return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1299 }
1300
1301 /* Checks whether the condition *COND_P in STMT is interesting
1302    and if so, records it.  */
1303
1304 static void
1305 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1306 {
1307   tree *op0_p;
1308   tree *op1_p;
1309   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1310   struct iv const_iv;
1311   tree zero = integer_zero_node;
1312
1313   const_iv.step = NULL_TREE;
1314
1315   if (TREE_CODE (*cond_p) != SSA_NAME
1316       && !COMPARISON_CLASS_P (*cond_p))
1317     return;
1318
1319   if (TREE_CODE (*cond_p) == SSA_NAME)
1320     {
1321       op0_p = cond_p;
1322       op1_p = &zero;
1323     }
1324   else
1325     {
1326       op0_p = &TREE_OPERAND (*cond_p, 0);
1327       op1_p = &TREE_OPERAND (*cond_p, 1);
1328     }
1329
1330   if (TREE_CODE (*op0_p) == SSA_NAME)
1331     iv0 = get_iv (data, *op0_p);
1332   else
1333     iv0 = &const_iv;
1334
1335   if (TREE_CODE (*op1_p) == SSA_NAME)
1336     iv1 = get_iv (data, *op1_p);
1337   else
1338     iv1 = &const_iv;
1339
1340   if (/* When comparing with non-invariant value, we may not do any senseful
1341          induction variable elimination.  */
1342       (!iv0 || !iv1)
1343       /* Eliminating condition based on two ivs would be nontrivial.
1344          ??? TODO -- it is not really important to handle this case.  */
1345       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1346     {
1347       find_interesting_uses_op (data, *op0_p);
1348       find_interesting_uses_op (data, *op1_p);
1349       return;
1350     }
1351
1352   if (zero_p (iv0->step) && zero_p (iv1->step))
1353     {
1354       /* If both are invariants, this is a work for unswitching.  */
1355       return;
1356     }
1357
1358   civ = xmalloc (sizeof (struct iv));
1359   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1360   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1361 }
1362
1363 /* Returns true if expression EXPR is obviously invariant in LOOP,
1364    i.e. if all its operands are defined outside of the LOOP.  */
1365
1366 bool
1367 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1368 {
1369   basic_block def_bb;
1370   unsigned i, len;
1371
1372   if (is_gimple_min_invariant (expr))
1373     return true;
1374
1375   if (TREE_CODE (expr) == SSA_NAME)
1376     {
1377       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1378       if (def_bb
1379           && flow_bb_inside_loop_p (loop, def_bb))
1380         return false;
1381
1382       return true;
1383     }
1384
1385   if (!EXPR_P (expr))
1386     return false;
1387
1388   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1389   for (i = 0; i < len; i++)
1390     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1391       return false;
1392
1393   return true;
1394 }
1395
1396 /* Cumulates the steps of indices into DATA and replaces their values with the
1397    initial ones.  Returns false when the value of the index cannot be determined.
1398    Callback for for_each_index.  */
1399
1400 struct ifs_ivopts_data
1401 {
1402   struct ivopts_data *ivopts_data;
1403   tree stmt;
1404   tree *step_p;
1405 };
1406
1407 static bool
1408 idx_find_step (tree base, tree *idx, void *data)
1409 {
1410   struct ifs_ivopts_data *dta = data;
1411   struct iv *iv;
1412   tree step, iv_step, lbound, off;
1413   struct loop *loop = dta->ivopts_data->current_loop;
1414
1415   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1416       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1417     return false;
1418
1419   /* If base is a component ref, require that the offset of the reference
1420      be invariant.  */
1421   if (TREE_CODE (base) == COMPONENT_REF)
1422     {
1423       off = component_ref_field_offset (base);
1424       return expr_invariant_in_loop_p (loop, off);
1425     }
1426
1427   /* If base is array, first check whether we will be able to move the
1428      reference out of the loop (in order to take its address in strength
1429      reduction).  In order for this to work we need both lower bound
1430      and step to be loop invariants.  */
1431   if (TREE_CODE (base) == ARRAY_REF)
1432     {
1433       step = array_ref_element_size (base);
1434       lbound = array_ref_low_bound (base);
1435
1436       if (!expr_invariant_in_loop_p (loop, step)
1437           || !expr_invariant_in_loop_p (loop, lbound))
1438         return false;
1439     }
1440
1441   if (TREE_CODE (*idx) != SSA_NAME)
1442     return true;
1443
1444   iv = get_iv (dta->ivopts_data, *idx);
1445   if (!iv)
1446     return false;
1447
1448   *idx = iv->base;
1449
1450   if (!iv->step)
1451     return true;
1452
1453   if (TREE_CODE (base) == ARRAY_REF)
1454     {
1455       step = array_ref_element_size (base);
1456
1457       /* We only handle addresses whose step is an integer constant.  */
1458       if (TREE_CODE (step) != INTEGER_CST)
1459         return false;
1460     }
1461   else
1462     /* The step for pointer arithmetics already is 1 byte.  */
1463     step = build_int_cst (sizetype, 1);
1464
1465   /* FIXME: convert_step should not be used outside chrec_convert: fix
1466      this by calling chrec_convert.  */
1467   iv_step = convert_step (dta->ivopts_data->current_loop,
1468                           sizetype, iv->base, iv->step, dta->stmt);
1469
1470   if (!iv_step)
1471     {
1472       /* The index might wrap.  */
1473       return false;
1474     }
1475
1476   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1477
1478   if (!*dta->step_p)
1479     *dta->step_p = step;
1480   else
1481     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1482
1483   return true;
1484 }
1485
1486 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1487    object is passed to it in DATA.  */
1488
1489 static bool
1490 idx_record_use (tree base, tree *idx,
1491                 void *data)
1492 {
1493   find_interesting_uses_op (data, *idx);
1494   if (TREE_CODE (base) == ARRAY_REF)
1495     {
1496       find_interesting_uses_op (data, array_ref_element_size (base));
1497       find_interesting_uses_op (data, array_ref_low_bound (base));
1498     }
1499   return true;
1500 }
1501
1502 /* Returns true if memory reference REF may be unaligned.  */
1503
1504 static bool
1505 may_be_unaligned_p (tree ref)
1506 {
1507   tree base;
1508   tree base_type;
1509   HOST_WIDE_INT bitsize;
1510   HOST_WIDE_INT bitpos;
1511   tree toffset;
1512   enum machine_mode mode;
1513   int unsignedp, volatilep;
1514   unsigned base_align;
1515
1516   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1517      thus they are not misaligned.  */
1518   if (TREE_CODE (ref) == TARGET_MEM_REF)
1519     return false;
1520
1521   /* The test below is basically copy of what expr.c:normal_inner_ref
1522      does to check whether the object must be loaded by parts when
1523      STRICT_ALIGNMENT is true.  */
1524   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1525                               &unsignedp, &volatilep, true);
1526   base_type = TREE_TYPE (base);
1527   base_align = TYPE_ALIGN (base_type);
1528
1529   if (mode != BLKmode
1530       && (base_align < GET_MODE_ALIGNMENT (mode)
1531           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1532           || bitpos % BITS_PER_UNIT != 0))
1533     return true;
1534
1535   return false;
1536 }
1537
1538 /* Finds addresses in *OP_P inside STMT.  */
1539
1540 static void
1541 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1542 {
1543   tree base = *op_p, step = NULL;
1544   struct iv *civ;
1545   struct ifs_ivopts_data ifs_ivopts_data;
1546
1547   /* Do not play with volatile memory references.  A bit too conservative,
1548      perhaps, but safe.  */
1549   if (stmt_ann (stmt)->has_volatile_ops)
1550     goto fail;
1551
1552   /* Ignore bitfields for now.  Not really something terribly complicated
1553      to handle.  TODO.  */
1554   if (TREE_CODE (base) == COMPONENT_REF
1555       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1556     goto fail;
1557
1558   if (STRICT_ALIGNMENT
1559       && may_be_unaligned_p (base))
1560     goto fail;
1561
1562   base = unshare_expr (base);
1563
1564   if (TREE_CODE (base) == TARGET_MEM_REF)
1565     {
1566       tree type = build_pointer_type (TREE_TYPE (base));
1567       tree astep;
1568
1569       if (TMR_BASE (base)
1570           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1571         {
1572           civ = get_iv (data, TMR_BASE (base));
1573           if (!civ)
1574             goto fail;
1575
1576           TMR_BASE (base) = civ->base;
1577           step = civ->step;
1578         }
1579       if (TMR_INDEX (base)
1580           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1581         {
1582           civ = get_iv (data, TMR_INDEX (base));
1583           if (!civ)
1584             goto fail;
1585
1586           TMR_INDEX (base) = civ->base;
1587           astep = civ->step;
1588
1589           if (astep)
1590             {
1591               if (TMR_STEP (base))
1592                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1593
1594               if (step)
1595                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1596               else
1597                 step = astep;
1598             }
1599         }
1600
1601       if (zero_p (step))
1602         goto fail;
1603       base = tree_mem_ref_addr (type, base);
1604     }
1605   else
1606     {
1607       ifs_ivopts_data.ivopts_data = data;
1608       ifs_ivopts_data.stmt = stmt;
1609       ifs_ivopts_data.step_p = &step;
1610       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1611           || zero_p (step))
1612         goto fail;
1613
1614       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1615       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1616
1617       base = build_fold_addr_expr (base);
1618     }
1619
1620   civ = alloc_iv (base, step);
1621   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1622   return;
1623
1624 fail:
1625   for_each_index (op_p, idx_record_use, data);
1626 }
1627
1628 /* Finds and records invariants used in STMT.  */
1629
1630 static void
1631 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1632 {
1633   ssa_op_iter iter;
1634   use_operand_p use_p;
1635   tree op;
1636
1637   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1638     {
1639       op = USE_FROM_PTR (use_p);
1640       record_invariant (data, op, false);
1641     }
1642 }
1643
1644 /* Finds interesting uses of induction variables in the statement STMT.  */
1645
1646 static void
1647 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1648 {
1649   struct iv *iv;
1650   tree op, lhs, rhs;
1651   ssa_op_iter iter;
1652   use_operand_p use_p;
1653
1654   find_invariants_stmt (data, stmt);
1655
1656   if (TREE_CODE (stmt) == COND_EXPR)
1657     {
1658       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1659       return;
1660     }
1661
1662   if (TREE_CODE (stmt) == MODIFY_EXPR)
1663     {
1664       lhs = TREE_OPERAND (stmt, 0);
1665       rhs = TREE_OPERAND (stmt, 1);
1666
1667       if (TREE_CODE (lhs) == SSA_NAME)
1668         {
1669           /* If the statement defines an induction variable, the uses are not
1670              interesting by themselves.  */
1671
1672           iv = get_iv (data, lhs);
1673
1674           if (iv && !zero_p (iv->step))
1675             return;
1676         }
1677
1678       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1679         {
1680         case tcc_comparison:
1681           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1682           return;
1683
1684         case tcc_reference:
1685           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1686           if (REFERENCE_CLASS_P (lhs))
1687             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1688           return;
1689
1690         default: ;
1691         }
1692
1693       if (REFERENCE_CLASS_P (lhs)
1694           && is_gimple_val (rhs))
1695         {
1696           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1697           find_interesting_uses_op (data, rhs);
1698           return;
1699         }
1700
1701       /* TODO -- we should also handle address uses of type
1702
1703          memory = call (whatever);
1704
1705          and
1706
1707          call (memory).  */
1708     }
1709
1710   if (TREE_CODE (stmt) == PHI_NODE
1711       && bb_for_stmt (stmt) == data->current_loop->header)
1712     {
1713       lhs = PHI_RESULT (stmt);
1714       iv = get_iv (data, lhs);
1715
1716       if (iv && !zero_p (iv->step))
1717         return;
1718     }
1719
1720   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1721     {
1722       op = USE_FROM_PTR (use_p);
1723
1724       if (TREE_CODE (op) != SSA_NAME)
1725         continue;
1726
1727       iv = get_iv (data, op);
1728       if (!iv)
1729         continue;
1730
1731       find_interesting_uses_op (data, op);
1732     }
1733 }
1734
1735 /* Finds interesting uses of induction variables outside of loops
1736    on loop exit edge EXIT.  */
1737
1738 static void
1739 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1740 {
1741   tree phi, def;
1742
1743   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1744     {
1745       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1746       find_interesting_uses_outer (data, def);
1747     }
1748 }
1749
1750 /* Finds uses of the induction variables that are interesting.  */
1751
1752 static void
1753 find_interesting_uses (struct ivopts_data *data)
1754 {
1755   basic_block bb;
1756   block_stmt_iterator bsi;
1757   tree phi;
1758   basic_block *body = get_loop_body (data->current_loop);
1759   unsigned i;
1760   struct version_info *info;
1761   edge e;
1762
1763   if (dump_file && (dump_flags & TDF_DETAILS))
1764     fprintf (dump_file, "Uses:\n\n");
1765
1766   for (i = 0; i < data->current_loop->num_nodes; i++)
1767     {
1768       edge_iterator ei;
1769       bb = body[i];
1770
1771       FOR_EACH_EDGE (e, ei, bb->succs)
1772         if (e->dest != EXIT_BLOCK_PTR
1773             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1774           find_interesting_uses_outside (data, e);
1775
1776       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1777         find_interesting_uses_stmt (data, phi);
1778       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1779         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1780     }
1781
1782   if (dump_file && (dump_flags & TDF_DETAILS))
1783     {
1784       bitmap_iterator bi;
1785
1786       fprintf (dump_file, "\n");
1787
1788       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1789         {
1790           info = ver_info (data, i);
1791           if (info->inv_id)
1792             {
1793               fprintf (dump_file, "  ");
1794               print_generic_expr (dump_file, info->name, TDF_SLIM);
1795               fprintf (dump_file, " is invariant (%d)%s\n",
1796                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1797             }
1798         }
1799
1800       fprintf (dump_file, "\n");
1801     }
1802
1803   free (body);
1804 }
1805
1806 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1807    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1808    we are at the top-level of the processed address.  */
1809
1810 static tree
1811 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1812                 unsigned HOST_WIDE_INT *offset)
1813 {
1814   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1815   enum tree_code code;
1816   tree type, orig_type = TREE_TYPE (expr);
1817   unsigned HOST_WIDE_INT off0, off1, st;
1818   tree orig_expr = expr;
1819
1820   STRIP_NOPS (expr);
1821
1822   type = TREE_TYPE (expr);
1823   code = TREE_CODE (expr);
1824   *offset = 0;
1825
1826   switch (code)
1827     {
1828     case INTEGER_CST:
1829       if (!cst_and_fits_in_hwi (expr)
1830           || zero_p (expr))
1831         return orig_expr;
1832
1833       *offset = int_cst_value (expr);
1834       return build_int_cst_type (orig_type, 0);
1835
1836     case PLUS_EXPR:
1837     case MINUS_EXPR:
1838       op0 = TREE_OPERAND (expr, 0);
1839       op1 = TREE_OPERAND (expr, 1);
1840
1841       op0 = strip_offset_1 (op0, false, false, &off0);
1842       op1 = strip_offset_1 (op1, false, false, &off1);
1843
1844       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1845       if (op0 == TREE_OPERAND (expr, 0)
1846           && op1 == TREE_OPERAND (expr, 1))
1847         return orig_expr;
1848
1849       if (zero_p (op1))
1850         expr = op0;
1851       else if (zero_p (op0))
1852         {
1853           if (code == PLUS_EXPR)
1854             expr = op1;
1855           else
1856             expr = fold_build1 (NEGATE_EXPR, type, op1);
1857         }
1858       else
1859         expr = fold_build2 (code, type, op0, op1);
1860
1861       return fold_convert (orig_type, expr);
1862
1863     case ARRAY_REF:
1864       if (!inside_addr)
1865         return orig_expr;
1866
1867       step = array_ref_element_size (expr);
1868       if (!cst_and_fits_in_hwi (step))
1869         break;
1870
1871       st = int_cst_value (step);
1872       op1 = TREE_OPERAND (expr, 1);
1873       op1 = strip_offset_1 (op1, false, false, &off1);
1874       *offset = off1 * st;
1875
1876       if (top_compref
1877           && zero_p (op1))
1878         {
1879           /* Strip the component reference completely.  */
1880           op0 = TREE_OPERAND (expr, 0);
1881           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1882           *offset += off0;
1883           return op0;
1884         }
1885       break;
1886
1887     case COMPONENT_REF:
1888       if (!inside_addr)
1889         return orig_expr;
1890
1891       tmp = component_ref_field_offset (expr);
1892       if (top_compref
1893           && cst_and_fits_in_hwi (tmp))
1894         {
1895           /* Strip the component reference completely.  */
1896           op0 = TREE_OPERAND (expr, 0);
1897           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1898           *offset = off0 + int_cst_value (tmp);
1899           return op0;
1900         }
1901       break;
1902
1903     case ADDR_EXPR:
1904       op0 = TREE_OPERAND (expr, 0);
1905       op0 = strip_offset_1 (op0, true, true, &off0);
1906       *offset += off0;
1907
1908       if (op0 == TREE_OPERAND (expr, 0))
1909         return orig_expr;
1910
1911       expr = build_fold_addr_expr (op0);
1912       return fold_convert (orig_type, expr);
1913
1914     case INDIRECT_REF:
1915       inside_addr = false;
1916       break;
1917
1918     default:
1919       return orig_expr;
1920     }
1921
1922   /* Default handling of expressions for that we want to recurse into
1923      the first operand.  */
1924   op0 = TREE_OPERAND (expr, 0);
1925   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1926   *offset += off0;
1927
1928   if (op0 == TREE_OPERAND (expr, 0)
1929       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1930     return orig_expr;
1931
1932   expr = copy_node (expr);
1933   TREE_OPERAND (expr, 0) = op0;
1934   if (op1)
1935     TREE_OPERAND (expr, 1) = op1;
1936
1937   /* Inside address, we might strip the top level component references,
1938      thus changing type of the expression.  Handling of ADDR_EXPR
1939      will fix that.  */
1940   expr = fold_convert (orig_type, expr);
1941
1942   return expr;
1943 }
1944
1945 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1946
1947 static tree
1948 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1949 {
1950   return strip_offset_1 (expr, false, false, offset);
1951 }
1952
1953 /* Returns variant of TYPE that can be used as base for different uses.
1954    For integer types, we return unsigned variant of the type, which
1955    avoids problems with overflows.  For pointer types, we return void *.  */
1956
1957 static tree
1958 generic_type_for (tree type)
1959 {
1960   if (POINTER_TYPE_P (type))
1961     return ptr_type_node;
1962
1963   if (TYPE_UNSIGNED (type))
1964     return type;
1965
1966   return unsigned_type_for (type);
1967 }
1968
1969 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1970    the bitmap to that we should store it.  */
1971
1972 static struct ivopts_data *fd_ivopts_data;
1973 static tree
1974 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1975 {
1976   bitmap *depends_on = data;
1977   struct version_info *info;
1978
1979   if (TREE_CODE (*expr_p) != SSA_NAME)
1980     return NULL_TREE;
1981   info = name_info (fd_ivopts_data, *expr_p);
1982
1983   if (!info->inv_id || info->has_nonlin_use)
1984     return NULL_TREE;
1985
1986   if (!*depends_on)
1987     *depends_on = BITMAP_ALLOC (NULL);
1988   bitmap_set_bit (*depends_on, info->inv_id);
1989
1990   return NULL_TREE;
1991 }
1992
1993 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1994    position to POS.  If USE is not NULL, the candidate is set as related to
1995    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1996    replacement of the final value of the iv by a direct computation.  */
1997
1998 static struct iv_cand *
1999 add_candidate_1 (struct ivopts_data *data,
2000                  tree base, tree step, bool important, enum iv_position pos,
2001                  struct iv_use *use, tree incremented_at)
2002 {
2003   unsigned i;
2004   struct iv_cand *cand = NULL;
2005   tree type, orig_type;
2006
2007   if (base)
2008     {
2009       orig_type = TREE_TYPE (base);
2010       type = generic_type_for (orig_type);
2011       if (type != orig_type)
2012         {
2013           base = fold_convert (type, base);
2014           if (step)
2015             step = fold_convert (type, step);
2016         }
2017     }
2018
2019   for (i = 0; i < n_iv_cands (data); i++)
2020     {
2021       cand = iv_cand (data, i);
2022
2023       if (cand->pos != pos)
2024         continue;
2025
2026       if (cand->incremented_at != incremented_at)
2027         continue;
2028
2029       if (!cand->iv)
2030         {
2031           if (!base && !step)
2032             break;
2033
2034           continue;
2035         }
2036
2037       if (!base && !step)
2038         continue;
2039
2040       if (!operand_equal_p (base, cand->iv->base, 0))
2041         continue;
2042
2043       if (zero_p (cand->iv->step))
2044         {
2045           if (zero_p (step))
2046             break;
2047         }
2048       else
2049         {
2050           if (step && operand_equal_p (step, cand->iv->step, 0))
2051             break;
2052         }
2053     }
2054
2055   if (i == n_iv_cands (data))
2056     {
2057       cand = xcalloc (1, sizeof (struct iv_cand));
2058       cand->id = i;
2059
2060       if (!base && !step)
2061         cand->iv = NULL;
2062       else
2063         cand->iv = alloc_iv (base, step);
2064
2065       cand->pos = pos;
2066       if (pos != IP_ORIGINAL && cand->iv)
2067         {
2068           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2069           cand->var_after = cand->var_before;
2070         }
2071       cand->important = important;
2072       cand->incremented_at = incremented_at;
2073       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2074
2075       if (step
2076           && TREE_CODE (step) != INTEGER_CST)
2077         {
2078           fd_ivopts_data = data;
2079           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2080         }
2081
2082       if (dump_file && (dump_flags & TDF_DETAILS))
2083         dump_cand (dump_file, cand);
2084     }
2085
2086   if (important && !cand->important)
2087     {
2088       cand->important = true;
2089       if (dump_file && (dump_flags & TDF_DETAILS))
2090         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2091     }
2092
2093   if (use)
2094     {
2095       bitmap_set_bit (use->related_cands, i);
2096       if (dump_file && (dump_flags & TDF_DETAILS))
2097         fprintf (dump_file, "Candidate %d is related to use %d\n",
2098                  cand->id, use->id);
2099     }
2100
2101   return cand;
2102 }
2103
2104 /* Returns true if incrementing the induction variable at the end of the LOOP
2105    is allowed.
2106
2107    The purpose is to avoid splitting latch edge with a biv increment, thus
2108    creating a jump, possibly confusing other optimization passes and leaving
2109    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2110    is not available (so we do not have a better alternative), or if the latch
2111    edge is already nonempty.  */
2112
2113 static bool
2114 allow_ip_end_pos_p (struct loop *loop)
2115 {
2116   if (!ip_normal_pos (loop))
2117     return true;
2118
2119   if (!empty_block_p (ip_end_pos (loop)))
2120     return true;
2121
2122   return false;
2123 }
2124
2125 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2126    position to POS.  If USE is not NULL, the candidate is set as related to
2127    it.  The candidate computation is scheduled on all available positions.  */
2128
2129 static void
2130 add_candidate (struct ivopts_data *data,
2131                tree base, tree step, bool important, struct iv_use *use)
2132 {
2133   if (ip_normal_pos (data->current_loop))
2134     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2135   if (ip_end_pos (data->current_loop)
2136       && allow_ip_end_pos_p (data->current_loop))
2137     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2138 }
2139
2140 /* Add a standard "0 + 1 * iteration" iv candidate for a
2141    type with SIZE bits.  */
2142
2143 static void
2144 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2145                                      unsigned int size)
2146 {
2147   tree type = lang_hooks.types.type_for_size (size, true);
2148   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2149                  true, NULL);
2150 }
2151
2152 /* Adds standard iv candidates.  */
2153
2154 static void
2155 add_standard_iv_candidates (struct ivopts_data *data)
2156 {
2157   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2158
2159   /* The same for a double-integer type if it is still fast enough.  */
2160   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2161     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2162 }
2163
2164
2165 /* Adds candidates bases on the old induction variable IV.  */
2166
2167 static void
2168 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2169 {
2170   tree phi, def;
2171   struct iv_cand *cand;
2172
2173   add_candidate (data, iv->base, iv->step, true, NULL);
2174
2175   /* The same, but with initial value zero.  */
2176   add_candidate (data,
2177                  build_int_cst (TREE_TYPE (iv->base), 0),
2178                  iv->step, true, NULL);
2179
2180   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2181   if (TREE_CODE (phi) == PHI_NODE)
2182     {
2183       /* Additionally record the possibility of leaving the original iv
2184          untouched.  */
2185       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2186       cand = add_candidate_1 (data,
2187                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2188                               SSA_NAME_DEF_STMT (def));
2189       cand->var_before = iv->ssa_name;
2190       cand->var_after = def;
2191     }
2192 }
2193
2194 /* Adds candidates based on the old induction variables.  */
2195
2196 static void
2197 add_old_ivs_candidates (struct ivopts_data *data)
2198 {
2199   unsigned i;
2200   struct iv *iv;
2201   bitmap_iterator bi;
2202
2203   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2204     {
2205       iv = ver_info (data, i)->iv;
2206       if (iv && iv->biv_p && !zero_p (iv->step))
2207         add_old_iv_candidates (data, iv);
2208     }
2209 }
2210
2211 /* Adds candidates based on the value of the induction variable IV and USE.  */
2212
2213 static void
2214 add_iv_value_candidates (struct ivopts_data *data,
2215                          struct iv *iv, struct iv_use *use)
2216 {
2217   unsigned HOST_WIDE_INT offset;
2218   tree base;
2219
2220   add_candidate (data, iv->base, iv->step, false, use);
2221
2222   /* The same, but with initial value zero.  Make such variable important,
2223      since it is generic enough so that possibly many uses may be based
2224      on it.  */
2225   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2226                  iv->step, true, use);
2227
2228   /* Third, try removing the constant offset.  */
2229   base = strip_offset (iv->base, &offset);
2230   if (offset)
2231     add_candidate (data, base, iv->step, false, use);
2232 }
2233
2234 /* Possibly adds pseudocandidate for replacing the final value of USE by
2235    a direct computation.  */
2236
2237 static void
2238 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2239 {
2240   struct tree_niter_desc *niter;
2241
2242   /* We must know where we exit the loop and how many times does it roll.  */
2243   niter = niter_for_single_dom_exit (data);
2244   if (!niter
2245       || !zero_p (niter->may_be_zero))
2246     return;
2247
2248   add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2249 }
2250
2251 /* Adds candidates based on the uses.  */
2252
2253 static void
2254 add_derived_ivs_candidates (struct ivopts_data *data)
2255 {
2256   unsigned i;
2257
2258   for (i = 0; i < n_iv_uses (data); i++)
2259     {
2260       struct iv_use *use = iv_use (data, i);
2261
2262       if (!use)
2263         continue;
2264
2265       switch (use->type)
2266         {
2267         case USE_NONLINEAR_EXPR:
2268         case USE_COMPARE:
2269         case USE_ADDRESS:
2270           /* Just add the ivs based on the value of the iv used here.  */
2271           add_iv_value_candidates (data, use->iv, use);
2272           break;
2273
2274         case USE_OUTER:
2275           add_iv_value_candidates (data, use->iv, use);
2276
2277           /* Additionally, add the pseudocandidate for the possibility to
2278              replace the final value by a direct computation.  */
2279           add_iv_outer_candidates (data, use);
2280           break;
2281
2282         default:
2283           gcc_unreachable ();
2284         }
2285     }
2286 }
2287
2288 /* Record important candidates and add them to related_cands bitmaps
2289    if needed.  */
2290
2291 static void
2292 record_important_candidates (struct ivopts_data *data)
2293 {
2294   unsigned i;
2295   struct iv_use *use;
2296
2297   for (i = 0; i < n_iv_cands (data); i++)
2298     {
2299       struct iv_cand *cand = iv_cand (data, i);
2300
2301       if (cand->important)
2302         bitmap_set_bit (data->important_candidates, i);
2303     }
2304
2305   data->consider_all_candidates = (n_iv_cands (data)
2306                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2307
2308   if (data->consider_all_candidates)
2309     {
2310       /* We will not need "related_cands" bitmaps in this case,
2311          so release them to decrease peak memory consumption.  */
2312       for (i = 0; i < n_iv_uses (data); i++)
2313         {
2314           use = iv_use (data, i);
2315           BITMAP_FREE (use->related_cands);
2316         }
2317     }
2318   else
2319     {
2320       /* Add important candidates to the related_cands bitmaps.  */
2321       for (i = 0; i < n_iv_uses (data); i++)
2322         bitmap_ior_into (iv_use (data, i)->related_cands,
2323                          data->important_candidates);
2324     }
2325 }
2326
2327 /* Finds the candidates for the induction variables.  */
2328
2329 static void
2330 find_iv_candidates (struct ivopts_data *data)
2331 {
2332   /* Add commonly used ivs.  */
2333   add_standard_iv_candidates (data);
2334
2335   /* Add old induction variables.  */
2336   add_old_ivs_candidates (data);
2337
2338   /* Add induction variables derived from uses.  */
2339   add_derived_ivs_candidates (data);
2340
2341   /* Record the important candidates.  */
2342   record_important_candidates (data);
2343 }
2344
2345 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2346    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2347    we allocate a simple list to every use.  */
2348
2349 static void
2350 alloc_use_cost_map (struct ivopts_data *data)
2351 {
2352   unsigned i, size, s, j;
2353
2354   for (i = 0; i < n_iv_uses (data); i++)
2355     {
2356       struct iv_use *use = iv_use (data, i);
2357       bitmap_iterator bi;
2358
2359       if (data->consider_all_candidates)
2360         size = n_iv_cands (data);
2361       else
2362         {
2363           s = 0;
2364           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2365             {
2366               s++;
2367             }
2368
2369           /* Round up to the power of two, so that moduling by it is fast.  */
2370           for (size = 1; size < s; size <<= 1)
2371             continue;
2372         }
2373
2374       use->n_map_members = size;
2375       use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2376     }
2377 }
2378
2379 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2380    on invariants DEPENDS_ON and that the value used in expressing it
2381    is VALUE.*/
2382
2383 static void
2384 set_use_iv_cost (struct ivopts_data *data,
2385                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2386                  bitmap depends_on, tree value)
2387 {
2388   unsigned i, s;
2389
2390   if (cost == INFTY)
2391     {
2392       BITMAP_FREE (depends_on);
2393       return;
2394     }
2395
2396   if (data->consider_all_candidates)
2397     {
2398       use->cost_map[cand->id].cand = cand;
2399       use->cost_map[cand->id].cost = cost;
2400       use->cost_map[cand->id].depends_on = depends_on;
2401       use->cost_map[cand->id].value = value;
2402       return;
2403     }
2404
2405   /* n_map_members is a power of two, so this computes modulo.  */
2406   s = cand->id & (use->n_map_members - 1);
2407   for (i = s; i < use->n_map_members; i++)
2408     if (!use->cost_map[i].cand)
2409       goto found;
2410   for (i = 0; i < s; i++)
2411     if (!use->cost_map[i].cand)
2412       goto found;
2413
2414   gcc_unreachable ();
2415
2416 found:
2417   use->cost_map[i].cand = cand;
2418   use->cost_map[i].cost = cost;
2419   use->cost_map[i].depends_on = depends_on;
2420   use->cost_map[i].value = value;
2421 }
2422
2423 /* Gets cost of (USE, CANDIDATE) pair.  */
2424
2425 static struct cost_pair *
2426 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2427                  struct iv_cand *cand)
2428 {
2429   unsigned i, s;
2430   struct cost_pair *ret;
2431
2432   if (!cand)
2433     return NULL;
2434
2435   if (data->consider_all_candidates)
2436     {
2437       ret = use->cost_map + cand->id;
2438       if (!ret->cand)
2439         return NULL;
2440
2441       return ret;
2442     }
2443
2444   /* n_map_members is a power of two, so this computes modulo.  */
2445   s = cand->id & (use->n_map_members - 1);
2446   for (i = s; i < use->n_map_members; i++)
2447     if (use->cost_map[i].cand == cand)
2448       return use->cost_map + i;
2449
2450   for (i = 0; i < s; i++)
2451     if (use->cost_map[i].cand == cand)
2452       return use->cost_map + i;
2453
2454   return NULL;
2455 }
2456
2457 /* Returns estimate on cost of computing SEQ.  */
2458
2459 static unsigned
2460 seq_cost (rtx seq)
2461 {
2462   unsigned cost = 0;
2463   rtx set;
2464
2465   for (; seq; seq = NEXT_INSN (seq))
2466     {
2467       set = single_set (seq);
2468       if (set)
2469         cost += rtx_cost (set, SET);
2470       else
2471         cost++;
2472     }
2473
2474   return cost;
2475 }
2476
2477 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2478 static rtx
2479 produce_memory_decl_rtl (tree obj, int *regno)
2480 {
2481   rtx x;
2482
2483   gcc_assert (obj);
2484   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2485     {
2486       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2487       x = gen_rtx_SYMBOL_REF (Pmode, name);
2488     }
2489   else
2490     x = gen_raw_REG (Pmode, (*regno)++);
2491
2492   return gen_rtx_MEM (DECL_MODE (obj), x);
2493 }
2494
2495 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2496    walk_tree.  DATA contains the actual fake register number.  */
2497
2498 static tree
2499 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2500 {
2501   tree obj = NULL_TREE;
2502   rtx x = NULL_RTX;
2503   int *regno = data;
2504
2505   switch (TREE_CODE (*expr_p))
2506     {
2507     case ADDR_EXPR:
2508       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2509            handled_component_p (*expr_p);
2510            expr_p = &TREE_OPERAND (*expr_p, 0))
2511         continue;
2512       obj = *expr_p;
2513       if (DECL_P (obj))
2514         x = produce_memory_decl_rtl (obj, regno);
2515       break;
2516
2517     case SSA_NAME:
2518       *ws = 0;
2519       obj = SSA_NAME_VAR (*expr_p);
2520       if (!DECL_RTL_SET_P (obj))
2521         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2522       break;
2523
2524     case VAR_DECL:
2525     case PARM_DECL:
2526     case RESULT_DECL:
2527       *ws = 0;
2528       obj = *expr_p;
2529
2530       if (DECL_RTL_SET_P (obj))
2531         break;
2532
2533       if (DECL_MODE (obj) == BLKmode)
2534         x = produce_memory_decl_rtl (obj, regno);
2535       else
2536         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2537
2538       break;
2539
2540     default:
2541       break;
2542     }
2543
2544   if (x)
2545     {
2546       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2547       SET_DECL_RTL (obj, x);
2548     }
2549
2550   return NULL_TREE;
2551 }
2552
2553 /* Determines cost of the computation of EXPR.  */
2554
2555 static unsigned
2556 computation_cost (tree expr)
2557 {
2558   rtx seq, rslt;
2559   tree type = TREE_TYPE (expr);
2560   unsigned cost;
2561   /* Avoid using hard regs in ways which may be unsupported.  */
2562   int regno = LAST_VIRTUAL_REGISTER + 1;
2563
2564   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2565   start_sequence ();
2566   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2567   seq = get_insns ();
2568   end_sequence ();
2569
2570   cost = seq_cost (seq);
2571   if (MEM_P (rslt))
2572     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2573
2574   return cost;
2575 }
2576
2577 /* Returns variable containing the value of candidate CAND at statement AT.  */
2578
2579 static tree
2580 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2581 {
2582   if (stmt_after_increment (loop, cand, stmt))
2583     return cand->var_after;
2584   else
2585     return cand->var_before;
2586 }
2587
2588 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2589    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2590
2591 int
2592 tree_int_cst_sign_bit (tree t)
2593 {
2594   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2595   unsigned HOST_WIDE_INT w;
2596
2597   if (bitno < HOST_BITS_PER_WIDE_INT)
2598     w = TREE_INT_CST_LOW (t);
2599   else
2600     {
2601       w = TREE_INT_CST_HIGH (t);
2602       bitno -= HOST_BITS_PER_WIDE_INT;
2603     }
2604
2605   return (w >> bitno) & 1;
2606 }
2607
2608 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2609    return cst.  Otherwise return NULL_TREE.  */
2610
2611 static tree
2612 constant_multiple_of (tree type, tree top, tree bot)
2613 {
2614   tree res, mby, p0, p1;
2615   enum tree_code code;
2616   bool negate;
2617
2618   STRIP_NOPS (top);
2619   STRIP_NOPS (bot);
2620
2621   if (operand_equal_p (top, bot, 0))
2622     return build_int_cst (type, 1);
2623
2624   code = TREE_CODE (top);
2625   switch (code)
2626     {
2627     case MULT_EXPR:
2628       mby = TREE_OPERAND (top, 1);
2629       if (TREE_CODE (mby) != INTEGER_CST)
2630         return NULL_TREE;
2631
2632       res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2633       if (!res)
2634         return NULL_TREE;
2635
2636       return fold_binary_to_constant (MULT_EXPR, type, res,
2637                                       fold_convert (type, mby));
2638
2639     case PLUS_EXPR:
2640     case MINUS_EXPR:
2641       p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2642       if (!p0)
2643         return NULL_TREE;
2644       p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2645       if (!p1)
2646         return NULL_TREE;
2647
2648       return fold_binary_to_constant (code, type, p0, p1);
2649
2650     case INTEGER_CST:
2651       if (TREE_CODE (bot) != INTEGER_CST)
2652         return NULL_TREE;
2653
2654       bot = fold_convert (type, bot);
2655       top = fold_convert (type, top);
2656
2657       /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2658          the result afterwards.  */
2659       if (tree_int_cst_sign_bit (bot))
2660         {
2661           negate = true;
2662           bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2663         }
2664       else
2665         negate = false;
2666
2667       /* Ditto for TOP.  */
2668       if (tree_int_cst_sign_bit (top))
2669         {
2670           negate = !negate;
2671           top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2672         }
2673
2674       if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2675         return NULL_TREE;
2676
2677       res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2678       if (negate)
2679         res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2680       return res;
2681
2682     default:
2683       return NULL_TREE;
2684     }
2685 }
2686
2687 /* Sets COMB to CST.  */
2688
2689 static void
2690 aff_combination_const (struct affine_tree_combination *comb, tree type,
2691                        unsigned HOST_WIDE_INT cst)
2692 {
2693   unsigned prec = TYPE_PRECISION (type);
2694
2695   comb->type = type;
2696   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2697
2698   comb->n = 0;
2699   comb->rest = NULL_TREE;
2700   comb->offset = cst & comb->mask;
2701 }
2702
2703 /* Sets COMB to single element ELT.  */
2704
2705 static void
2706 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2707 {
2708   unsigned prec = TYPE_PRECISION (type);
2709
2710   comb->type = type;
2711   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2712
2713   comb->n = 1;
2714   comb->elts[0] = elt;
2715   comb->coefs[0] = 1;
2716   comb->rest = NULL_TREE;
2717   comb->offset = 0;
2718 }
2719
2720 /* Scales COMB by SCALE.  */
2721
2722 static void
2723 aff_combination_scale (struct affine_tree_combination *comb,
2724                        unsigned HOST_WIDE_INT scale)
2725 {
2726   unsigned i, j;
2727
2728   if (scale == 1)
2729     return;
2730
2731   if (scale == 0)
2732     {
2733       aff_combination_const (comb, comb->type, 0);
2734       return;
2735     }
2736
2737   comb->offset = (scale * comb->offset) & comb->mask;
2738   for (i = 0, j = 0; i < comb->n; i++)
2739     {
2740       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2741       comb->elts[j] = comb->elts[i];
2742       if (comb->coefs[j] != 0)
2743         j++;
2744     }
2745   comb->n = j;
2746
2747   if (comb->rest)
2748     {
2749       if (comb->n < MAX_AFF_ELTS)
2750         {
2751           comb->coefs[comb->n] = scale;
2752           comb->elts[comb->n] = comb->rest;
2753           comb->rest = NULL_TREE;
2754           comb->n++;
2755         }
2756       else
2757         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2758                                   build_int_cst_type (comb->type, scale));
2759     }
2760 }
2761
2762 /* Adds ELT * SCALE to COMB.  */
2763
2764 static void
2765 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2766                          unsigned HOST_WIDE_INT scale)
2767 {
2768   unsigned i;
2769
2770   if (scale == 0)
2771     return;
2772
2773   for (i = 0; i < comb->n; i++)
2774     if (operand_equal_p (comb->elts[i], elt, 0))
2775       {
2776         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2777         if (comb->coefs[i])
2778           return;
2779
2780         comb->n--;
2781         comb->coefs[i] = comb->coefs[comb->n];
2782         comb->elts[i] = comb->elts[comb->n];
2783
2784         if (comb->rest)
2785           {
2786             gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2787             comb->coefs[comb->n] = 1;
2788             comb->elts[comb->n] = comb->rest;
2789             comb->rest = NULL_TREE;
2790             comb->n++;
2791           }
2792         return;
2793       }
2794   if (comb->n < MAX_AFF_ELTS)
2795     {
2796       comb->coefs[comb->n] = scale;
2797       comb->elts[comb->n] = elt;
2798       comb->n++;
2799       return;
2800     }
2801
2802   if (scale == 1)
2803     elt = fold_convert (comb->type, elt);
2804   else
2805     elt = fold_build2 (MULT_EXPR, comb->type,
2806                        fold_convert (comb->type, elt),
2807                        build_int_cst_type (comb->type, scale));
2808
2809   if (comb->rest)
2810     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2811   else
2812     comb->rest = elt;
2813 }
2814
2815 /* Adds COMB2 to COMB1.  */
2816
2817 static void
2818 aff_combination_add (struct affine_tree_combination *comb1,
2819                      struct affine_tree_combination *comb2)
2820 {
2821   unsigned i;
2822
2823   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2824   for (i = 0; i < comb2->n; i++)
2825     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2826   if (comb2->rest)
2827     aff_combination_add_elt (comb1, comb2->rest, 1);
2828 }
2829
2830 /* Splits EXPR into an affine combination of parts.  */
2831
2832 static void
2833 tree_to_aff_combination (tree expr, tree type,
2834                          struct affine_tree_combination *comb)
2835 {
2836   struct affine_tree_combination tmp;
2837   enum tree_code code;
2838   tree cst, core, toffset;
2839   HOST_WIDE_INT bitpos, bitsize;
2840   enum machine_mode mode;
2841   int unsignedp, volatilep;
2842
2843   STRIP_NOPS (expr);
2844
2845   code = TREE_CODE (expr);
2846   switch (code)
2847     {
2848     case INTEGER_CST:
2849       aff_combination_const (comb, type, int_cst_value (expr));
2850       return;
2851
2852     case PLUS_EXPR:
2853     case MINUS_EXPR:
2854       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2855       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2856       if (code == MINUS_EXPR)
2857         aff_combination_scale (&tmp, -1);
2858       aff_combination_add (comb, &tmp);
2859       return;
2860
2861     case MULT_EXPR:
2862       cst = TREE_OPERAND (expr, 1);
2863       if (TREE_CODE (cst) != INTEGER_CST)
2864         break;
2865       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2866       aff_combination_scale (comb, int_cst_value (cst));
2867       return;
2868
2869     case NEGATE_EXPR:
2870       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2871       aff_combination_scale (comb, -1);
2872       return;
2873
2874     case ADDR_EXPR:
2875       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2876                                   &toffset, &mode, &unsignedp, &volatilep,
2877                                   false);
2878       if (bitpos % BITS_PER_UNIT != 0)
2879         break;
2880       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2881       core = build_fold_addr_expr (core);
2882       if (TREE_CODE (core) == ADDR_EXPR)
2883         aff_combination_add_elt (comb, core, 1);
2884       else
2885         {
2886           tree_to_aff_combination (core, type, &tmp);
2887           aff_combination_add (comb, &tmp);
2888         }
2889       if (toffset)
2890         {
2891           tree_to_aff_combination (toffset, type, &tmp);
2892           aff_combination_add (comb, &tmp);
2893         }
2894       return;
2895
2896     default:
2897       break;
2898     }
2899
2900   aff_combination_elt (comb, type, expr);
2901 }
2902
2903 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2904
2905 static tree
2906 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2907                  unsigned HOST_WIDE_INT mask)
2908 {
2909   enum tree_code code;
2910
2911   scale &= mask;
2912   elt = fold_convert (type, elt);
2913
2914   if (scale == 1)
2915     {
2916       if (!expr)
2917         return elt;
2918
2919       return fold_build2 (PLUS_EXPR, type, expr, elt);
2920     }
2921
2922   if (scale == mask)
2923     {
2924       if (!expr)
2925         return fold_build1 (NEGATE_EXPR, type, elt);
2926
2927       return fold_build2 (MINUS_EXPR, type, expr, elt);
2928     }
2929
2930   if (!expr)
2931     return fold_build2 (MULT_EXPR, type, elt,
2932                         build_int_cst_type (type, scale));
2933
2934   if ((scale | (mask >> 1)) == mask)
2935     {
2936       /* Scale is negative.  */
2937       code = MINUS_EXPR;
2938       scale = (-scale) & mask;
2939     }
2940   else
2941     code = PLUS_EXPR;
2942
2943   elt = fold_build2 (MULT_EXPR, type, elt,
2944                      build_int_cst_type (type, scale));
2945   return fold_build2 (code, type, expr, elt);
2946 }
2947
2948 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2949
2950 static void
2951 unshare_aff_combination (struct affine_tree_combination *comb)
2952 {
2953   unsigned i;
2954
2955   for (i = 0; i < comb->n; i++)
2956     comb->elts[i] = unshare_expr (comb->elts[i]);
2957   if (comb->rest)
2958     comb->rest = unshare_expr (comb->rest);
2959 }
2960
2961 /* Makes tree from the affine combination COMB.  */
2962
2963 static tree
2964 aff_combination_to_tree (struct affine_tree_combination *comb)
2965 {
2966   tree type = comb->type;
2967   tree expr = comb->rest;
2968   unsigned i;
2969   unsigned HOST_WIDE_INT off, sgn;
2970
2971   /* Handle the special case produced by get_computation_aff when
2972      the type does not fit in HOST_WIDE_INT.  */
2973   if (comb->n == 0 && comb->offset == 0)
2974     return fold_convert (type, expr);
2975
2976   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2977
2978   for (i = 0; i < comb->n; i++)
2979     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2980                             comb->mask);
2981
2982   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2983     {
2984       /* Offset is negative.  */
2985       off = (-comb->offset) & comb->mask;
2986       sgn = comb->mask;
2987     }
2988   else
2989     {
2990       off = comb->offset;
2991       sgn = 1;
2992     }
2993   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2994                           comb->mask);
2995 }
2996
2997 /* Determines the expression by that USE is expressed from induction variable
2998    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2999    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3000
3001 static bool
3002 get_computation_aff (struct loop *loop,
3003                      struct iv_use *use, struct iv_cand *cand, tree at,
3004                      struct affine_tree_combination *aff)
3005 {
3006   tree ubase = use->iv->base;
3007   tree ustep = use->iv->step;
3008   tree cbase = cand->iv->base;
3009   tree cstep = cand->iv->step;
3010   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3011   tree uutype;
3012   tree expr, delta;
3013   tree ratio;
3014   unsigned HOST_WIDE_INT ustepi, cstepi;
3015   HOST_WIDE_INT ratioi;
3016   struct affine_tree_combination cbase_aff, expr_aff;
3017   tree cstep_orig = cstep, ustep_orig = ustep;
3018
3019   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3020     {
3021       /* We do not have a precision to express the values of use.  */
3022       return false;
3023     }
3024
3025   expr = var_at_stmt (loop, cand, at);
3026
3027   if (TREE_TYPE (expr) != ctype)
3028     {
3029       /* This may happen with the original ivs.  */
3030       expr = fold_convert (ctype, expr);
3031     }
3032
3033   if (TYPE_UNSIGNED (utype))
3034     uutype = utype;
3035   else
3036     {
3037       uutype = unsigned_type_for (utype);
3038       ubase = fold_convert (uutype, ubase);
3039       ustep = fold_convert (uutype, ustep);
3040     }
3041
3042   if (uutype != ctype)
3043     {
3044       expr = fold_convert (uutype, expr);
3045       cbase = fold_convert (uutype, cbase);
3046       cstep = fold_convert (uutype, cstep);
3047
3048       /* If the conversion is not noop, we must take it into account when
3049          considering the value of the step.  */
3050       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3051         cstep_orig = cstep;
3052     }
3053
3054   if (cst_and_fits_in_hwi (cstep_orig)
3055       && cst_and_fits_in_hwi (ustep_orig))
3056     {
3057       ustepi = int_cst_value (ustep_orig);
3058       cstepi = int_cst_value (cstep_orig);
3059
3060       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3061         {
3062           /* TODO maybe consider case when ustep divides cstep and the ratio is
3063              a power of 2 (so that the division is fast to execute)?  We would
3064              need to be much more careful with overflows etc. then.  */
3065           return false;
3066         }
3067
3068       ratio = build_int_cst_type (uutype, ratioi);
3069     }
3070   else
3071     {
3072       ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3073       if (!ratio)
3074         return false;
3075
3076       /* Ratioi is only used to detect special cases when the multiplicative
3077          factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3078          we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
3079          to integer_onep/integer_all_onesp, since the former ignores
3080          TREE_OVERFLOW.  */
3081       if (cst_and_fits_in_hwi (ratio))
3082         ratioi = int_cst_value (ratio);
3083       else if (integer_onep (ratio))
3084         ratioi = 1;
3085       else if (integer_all_onesp (ratio))
3086         ratioi = -1;
3087       else
3088         ratioi = 0;
3089     }
3090
3091   /* We may need to shift the value if we are after the increment.  */
3092   if (stmt_after_increment (loop, cand, at))
3093     cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3094
3095   /* use = ubase - ratio * cbase + ratio * var.
3096
3097      In general case ubase + ratio * (var - cbase) could be better (one less
3098      multiplication), but often it is possible to eliminate redundant parts
3099      of computations from (ubase - ratio * cbase) term, and if it does not
3100      happen, fold is able to apply the distributive law to obtain this form
3101      anyway.  */
3102
3103   if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3104     {
3105       /* Let's compute in trees and just return the result in AFF.  This case
3106          should not be very common, and fold itself is not that bad either,
3107          so making the aff. functions more complicated to handle this case
3108          is not that urgent.  */
3109       if (ratioi == 1)
3110         {
3111           delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3112           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3113         }
3114       else if (ratioi == -1)
3115         {
3116           delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3117           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3118         }
3119       else
3120         {
3121           delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3122           delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3123           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3124           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3125         }
3126
3127       aff->type = uutype;
3128       aff->n = 0;
3129       aff->offset = 0;
3130       aff->mask = 0;
3131       aff->rest = expr;
3132       return true;
3133     }
3134
3135   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3136      possible to compute ratioi.  */
3137   gcc_assert (ratioi);
3138
3139   tree_to_aff_combination (ubase, uutype, aff);
3140   tree_to_aff_combination (cbase, uutype, &cbase_aff);
3141   tree_to_aff_combination (expr, uutype, &expr_aff);
3142   aff_combination_scale (&cbase_aff, -ratioi);
3143   aff_combination_scale (&expr_aff, ratioi);
3144   aff_combination_add (aff, &cbase_aff);
3145   aff_combination_add (aff, &expr_aff);
3146
3147   return true;
3148 }
3149
3150 /* Determines the expression by that USE is expressed from induction variable
3151    CAND at statement AT in LOOP.  The computation is unshared.  */
3152
3153 static tree
3154 get_computation_at (struct loop *loop,
3155                     struct iv_use *use, struct iv_cand *cand, tree at)
3156 {
3157   struct affine_tree_combination aff;
3158   tree type = TREE_TYPE (use->iv->base);
3159
3160   if (!get_computation_aff (loop, use, cand, at, &aff))
3161     return NULL_TREE;
3162   unshare_aff_combination (&aff);
3163   return fold_convert (type, aff_combination_to_tree (&aff));
3164 }
3165
3166 /* Determines the expression by that USE is expressed from induction variable
3167    CAND in LOOP.  The computation is unshared.  */
3168
3169 static tree
3170 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3171 {
3172   return get_computation_at (loop, use, cand, use->stmt);
3173 }
3174
3175 /* Returns cost of addition in MODE.  */
3176
3177 static unsigned
3178 add_cost (enum machine_mode mode)
3179 {
3180   static unsigned costs[NUM_MACHINE_MODES];
3181   rtx seq;
3182   unsigned cost;
3183
3184   if (costs[mode])
3185     return costs[mode];
3186
3187   start_sequence ();
3188   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3189                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3190                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3191                  NULL_RTX);
3192   seq = get_insns ();
3193   end_sequence ();
3194
3195   cost = seq_cost (seq);
3196   if (!cost)
3197     cost = 1;
3198
3199   costs[mode] = cost;
3200
3201   if (dump_file && (dump_flags & TDF_DETAILS))
3202     fprintf (dump_file, "Addition in %s costs %d\n",
3203              GET_MODE_NAME (mode), cost);
3204   return cost;
3205 }
3206
3207 /* Entry in a hashtable of already known costs for multiplication.  */
3208 struct mbc_entry
3209 {
3210   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3211   enum machine_mode mode;       /* In mode.  */
3212   unsigned cost;                /* The cost.  */
3213 };
3214
3215 /* Counts hash value for the ENTRY.  */
3216
3217 static hashval_t
3218 mbc_entry_hash (const void *entry)
3219 {
3220   const struct mbc_entry *e = entry;
3221
3222   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3223 }
3224
3225 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3226
3227 static int
3228 mbc_entry_eq (const void *entry1, const void *entry2)
3229 {
3230   const struct mbc_entry *e1 = entry1;
3231   const struct mbc_entry *e2 = entry2;
3232
3233   return (e1->mode == e2->mode
3234           && e1->cst == e2->cst);
3235 }
3236
3237 /* Returns cost of multiplication by constant CST in MODE.  */
3238
3239 unsigned
3240 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3241 {
3242   static htab_t costs;
3243   struct mbc_entry **cached, act;
3244   rtx seq;
3245   unsigned cost;
3246
3247   if (!costs)
3248     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3249
3250   act.mode = mode;
3251   act.cst = cst;
3252   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3253   if (*cached)
3254     return (*cached)->cost;
3255
3256   *cached = xmalloc (sizeof (struct mbc_entry));
3257   (*cached)->mode = mode;
3258   (*cached)->cst = cst;
3259
3260   start_sequence ();
3261   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3262                gen_int_mode (cst, mode), NULL_RTX, 0);
3263   seq = get_insns ();
3264   end_sequence ();
3265
3266   cost = seq_cost (seq);
3267
3268   if (dump_file && (dump_flags & TDF_DETAILS))
3269     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3270              (int) cst, GET_MODE_NAME (mode), cost);
3271
3272   (*cached)->cost = cost;
3273
3274   return cost;
3275 }
3276
3277 /* Returns true if multiplying by RATIO is allowed in address.  */
3278
3279 bool
3280 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3281 {
3282 #define MAX_RATIO 128
3283   static sbitmap valid_mult;
3284
3285   if (!valid_mult)
3286     {
3287       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3288       rtx addr;
3289       HOST_WIDE_INT i;
3290
3291       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3292       sbitmap_zero (valid_mult);
3293       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3294       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3295         {
3296           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3297           if (memory_address_p (Pmode, addr))
3298             SET_BIT (valid_mult, i + MAX_RATIO);
3299         }
3300
3301       if (dump_file && (dump_flags & TDF_DETAILS))
3302         {
3303           fprintf (dump_file, "  allowed multipliers:");
3304           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3305             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3306               fprintf (dump_file, " %d", (int) i);
3307           fprintf (dump_file, "\n");
3308           fprintf (dump_file, "\n");
3309         }
3310     }
3311
3312   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3313     return false;
3314
3315   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3316 }
3317
3318 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3319    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3320    variable is omitted.  The created memory accesses MODE.
3321
3322    TODO -- there must be some better way.  This all is quite crude.  */
3323
3324 static unsigned
3325 get_address_cost (bool symbol_present, bool var_present,
3326                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3327 {
3328   static bool initialized = false;
3329   static HOST_WIDE_INT rat, off;
3330   static HOST_WIDE_INT min_offset, max_offset;
3331   static unsigned costs[2][2][2][2];
3332   unsigned cost, acost;
3333   rtx seq, addr, base;
3334   bool offset_p, ratio_p;
3335   rtx reg1;
3336   HOST_WIDE_INT s_offset;
3337   unsigned HOST_WIDE_INT mask;
3338   unsigned bits;
3339
3340   if (!initialized)
3341     {
3342       HOST_WIDE_INT i;
3343       initialized = true;
3344
3345       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3346
3347       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3348       for (i = 1; i <= 1 << 20; i <<= 1)
3349         {
3350           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3351           if (!memory_address_p (Pmode, addr))
3352             break;
3353         }
3354       max_offset = i >> 1;
3355       off = max_offset;
3356
3357       for (i = 1; i <= 1 << 20; i <<= 1)
3358         {
3359           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3360           if (!memory_address_p (Pmode, addr))
3361             break;
3362         }
3363       min_offset = -(i >> 1);
3364
3365       if (dump_file && (dump_flags & TDF_DETAILS))
3366         {
3367           fprintf (dump_file, "get_address_cost:\n");
3368           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3369           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3370         }
3371
3372       rat = 1;
3373       for (i = 2; i <= MAX_RATIO; i++)
3374         if (multiplier_allowed_in_address_p (i))
3375           {
3376             rat = i;
3377             break;
3378           }
3379     }
3380
3381   bits = GET_MODE_BITSIZE (Pmode);
3382   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3383   offset &= mask;
3384   if ((offset >> (bits - 1) & 1))
3385     offset |= ~mask;
3386   s_offset = offset;
3387
3388   cost = 0;
3389   offset_p = (s_offset != 0
3390               && min_offset <= s_offset && s_offset <= max_offset);
3391   ratio_p = (ratio != 1
3392              && multiplier_allowed_in_address_p (ratio));
3393
3394   if (ratio != 1 && !ratio_p)
3395     cost += multiply_by_cost (ratio, Pmode);
3396
3397   if (s_offset && !offset_p && !symbol_present)
3398     {
3399       cost += add_cost (Pmode);
3400       var_present = true;
3401     }
3402
3403   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3404   if (!acost)
3405     {
3406       int old_cse_not_expected;
3407       acost = 0;
3408
3409       addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3410       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3411       if (ratio_p)
3412         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3413
3414       if (var_present)
3415         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3416
3417       if (symbol_present)
3418         {
3419           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3420           if (offset_p)
3421             base = gen_rtx_fmt_e (CONST, Pmode,
3422                                   gen_rtx_fmt_ee (PLUS, Pmode,
3423                                                   base,
3424                                                   gen_int_mode (off, Pmode)));
3425         }
3426       else if (offset_p)
3427         base = gen_int_mode (off, Pmode);
3428       else
3429         base = NULL_RTX;
3430
3431       if (base)
3432         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3433
3434       start_sequence ();
3435       /* To avoid splitting addressing modes, pretend that no cse will
3436          follow.  */
3437       old_cse_not_expected = cse_not_expected;
3438       cse_not_expected = true;
3439       addr = memory_address (Pmode, addr);
3440       cse_not_expected = old_cse_not_expected;
3441       seq = get_insns ();
3442       end_sequence ();
3443
3444       acost = seq_cost (seq);
3445       acost += address_cost (addr, Pmode);
3446
3447       if (!acost)
3448         acost = 1;
3449       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3450     }
3451
3452   return cost + acost;
3453 }
3454
3455 /* Estimates cost of forcing expression EXPR into a variable.  */
3456
3457 unsigned
3458 force_expr_to_var_cost (tree expr)
3459 {
3460   static bool costs_initialized = false;
3461   static unsigned integer_cost;
3462   static unsigned symbol_cost;
3463   static unsigned address_cost;
3464   tree op0, op1;
3465   unsigned cost0, cost1, cost;
3466   enum machine_mode mode;
3467
3468   if (!costs_initialized)
3469     {
3470       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3471       rtx x = gen_rtx_MEM (DECL_MODE (var),
3472                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3473       tree addr;
3474       tree type = build_pointer_type (integer_type_node);
3475
3476       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3477                                                            2000));
3478
3479       SET_DECL_RTL (var, x);
3480       TREE_STATIC (var) = 1;
3481       addr = build1 (ADDR_EXPR, type, var);
3482       symbol_cost = computation_cost (addr) + 1;
3483
3484       address_cost
3485         = computation_cost (build2 (PLUS_EXPR, type,
3486                                     addr,
3487                                     build_int_cst_type (type, 2000))) + 1;
3488       if (dump_file && (dump_flags & TDF_DETAILS))
3489         {
3490           fprintf (dump_file, "force_expr_to_var_cost:\n");
3491           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3492           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3493           fprintf (dump_file, "  address %d\n", (int) address_cost);
3494           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3495           fprintf (dump_file, "\n");
3496         }
3497
3498       costs_initialized = true;
3499     }
3500
3501   STRIP_NOPS (expr);
3502
3503   if (SSA_VAR_P (expr))
3504     return 0;
3505
3506   if (TREE_INVARIANT (expr))
3507     {
3508       if (TREE_CODE (expr) == INTEGER_CST)
3509         return integer_cost;
3510
3511       if (TREE_CODE (expr) == ADDR_EXPR)
3512         {
3513           tree obj = TREE_OPERAND (expr, 0);
3514
3515           if (TREE_CODE (obj) == VAR_DECL
3516               || TREE_CODE (obj) == PARM_DECL
3517               || TREE_CODE (obj) == RESULT_DECL)
3518             return symbol_cost;
3519         }
3520
3521       return address_cost;
3522     }
3523
3524   switch (TREE_CODE (expr))
3525     {
3526     case PLUS_EXPR:
3527     case MINUS_EXPR:
3528     case MULT_EXPR:
3529       op0 = TREE_OPERAND (expr, 0);
3530       op1 = TREE_OPERAND (expr, 1);
3531       STRIP_NOPS (op0);
3532       STRIP_NOPS (op1);
3533
3534       if (is_gimple_val (op0))
3535         cost0 = 0;
3536       else
3537         cost0 = force_expr_to_var_cost (op0);
3538
3539       if (is_gimple_val (op1))
3540         cost1 = 0;
3541       else
3542         cost1 = force_expr_to_var_cost (op1);
3543
3544       break;
3545
3546     default:
3547       /* Just an arbitrary value, FIXME.  */
3548       return target_spill_cost;
3549     }
3550
3551   mode = TYPE_MODE (TREE_TYPE (expr));
3552   switch (TREE_CODE (expr))
3553     {
3554     case PLUS_EXPR:
3555     case MINUS_EXPR:
3556       cost = add_cost (mode);
3557       break;
3558
3559     case MULT_EXPR:
3560       if (cst_and_fits_in_hwi (op0))
3561         cost = multiply_by_cost (int_cst_value (op0), mode);
3562       else if (cst_and_fits_in_hwi (op1))
3563         cost = multiply_by_cost (int_cst_value (op1), mode);
3564       else
3565         return target_spill_cost;
3566       break;
3567
3568     default:
3569       gcc_unreachable ();
3570     }
3571
3572   cost += cost0;
3573   cost += cost1;
3574
3575   /* Bound the cost by target_spill_cost.  The parts of complicated
3576      computations often are either loop invariant or at least can
3577      be shared between several iv uses, so letting this grow without
3578      limits would not give reasonable results.  */
3579   return cost < target_spill_cost ? cost : target_spill_cost;
3580 }
3581
3582 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3583    invariants the computation depends on.  */
3584
3585 static unsigned
3586 force_var_cost (struct ivopts_data *data,
3587                 tree expr, bitmap *depends_on)
3588 {
3589   if (depends_on)
3590     {
3591       fd_ivopts_data = data;
3592       walk_tree (&expr, find_depends, depends_on, NULL);
3593     }
3594
3595   return force_expr_to_var_cost (expr);
3596 }
3597
3598 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3599    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3600    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3601    invariants the computation depends on.  */
3602
3603 static unsigned
3604 split_address_cost (struct ivopts_data *data,
3605                     tree addr, bool *symbol_present, bool *var_present,
3606                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3607 {
3608   tree core;
3609   HOST_WIDE_INT bitsize;
3610   HOST_WIDE_INT bitpos;
3611   tree toffset;
3612   enum machine_mode mode;
3613   int unsignedp, volatilep;
3614
3615   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3616                               &unsignedp, &volatilep, false);
3617
3618   if (toffset != 0
3619       || bitpos % BITS_PER_UNIT != 0
3620       || TREE_CODE (core) != VAR_DECL)
3621     {
3622       *symbol_present = false;
3623       *var_present = true;
3624       fd_ivopts_data = data;
3625       walk_tree (&addr, find_depends, depends_on, NULL);
3626       return target_spill_cost;
3627     }
3628
3629   *offset += bitpos / BITS_PER_UNIT;
3630   if (TREE_STATIC (core)
3631       || DECL_EXTERNAL (core))
3632     {
3633       *symbol_present = true;
3634       *var_present = false;
3635       return 0;
3636     }
3637
3638   *symbol_present = false;
3639   *var_present = true;
3640   return 0;
3641 }
3642
3643 /* Estimates cost of expressing difference of addresses E1 - E2 as
3644    var + symbol + offset.  The value of offset is added to OFFSET,
3645    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3646    part is missing.  DEPENDS_ON is a set of the invariants the computation
3647    depends on.  */
3648
3649 static unsigned
3650 ptr_difference_cost (struct ivopts_data *data,
3651                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3652                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3653 {
3654   HOST_WIDE_INT diff = 0;
3655   unsigned cost;
3656
3657   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3658
3659   if (ptr_difference_const (e1, e2, &diff))
3660     {
3661       *offset += diff;
3662       *symbol_present = false;
3663       *var_present = false;
3664       return 0;
3665     }
3666
3667   if (e2 == integer_zero_node)
3668     return split_address_cost (data, TREE_OPERAND (e1, 0),
3669                                symbol_present, var_present, offset, depends_on);
3670
3671   *symbol_present = false;
3672   *var_present = true;
3673
3674   cost = force_var_cost (data, e1, depends_on);
3675   cost += force_var_cost (data, e2, depends_on);
3676   cost += add_cost (Pmode);
3677
3678   return cost;
3679 }
3680
3681 /* Estimates cost of expressing difference E1 - E2 as
3682    var + symbol + offset.  The value of offset is added to OFFSET,
3683    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3684    part is missing.  DEPENDS_ON is a set of the invariants the computation
3685    depends on.  */
3686
3687 static unsigned
3688 difference_cost (struct ivopts_data *data,
3689                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3690                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3691 {
3692   unsigned cost;
3693   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3694   unsigned HOST_WIDE_INT off1, off2;
3695
3696   e1 = strip_offset (e1, &off1);
3697   e2 = strip_offset (e2, &off2);
3698   *offset += off1 - off2;
3699
3700   STRIP_NOPS (e1);
3701   STRIP_NOPS (e2);
3702
3703   if (TREE_CODE (e1) == ADDR_EXPR)
3704     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3705                                 depends_on);
3706   *symbol_present = false;
3707
3708   if (operand_equal_p (e1, e2, 0))
3709     {
3710       *var_present = false;
3711       return 0;
3712     }
3713   *var_present = true;
3714   if (zero_p (e2))
3715     return force_var_cost (data, e1, depends_on);
3716
3717   if (zero_p (e1))
3718     {
3719       cost = force_var_cost (data, e2, depends_on);
3720       cost += multiply_by_cost (-1, mode);
3721
3722       return cost;
3723     }
3724
3725   cost = force_var_cost (data, e1, depends_on);
3726   cost += force_var_cost (data, e2, depends_on);
3727   cost += add_cost (mode);
3728
3729   return cost;
3730 }
3731
3732 /* Determines the cost of the computation by that USE is expressed
3733    from induction variable CAND.  If ADDRESS_P is true, we just need
3734    to create an address from it, otherwise we want to get it into
3735    register.  A set of invariants we depend on is stored in
3736    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3737
3738 static unsigned
3739 get_computation_cost_at (struct ivopts_data *data,
3740                          struct iv_use *use, struct iv_cand *cand,
3741                          bool address_p, bitmap *depends_on, tree at)
3742 {
3743   tree ubase = use->iv->base, ustep = use->iv->step;
3744   tree cbase, cstep;
3745   tree utype = TREE_TYPE (ubase), ctype;
3746   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3747   HOST_WIDE_INT ratio, aratio;
3748   bool var_present, symbol_present;
3749   unsigned cost = 0, n_sums;
3750
3751   *depends_on = NULL;
3752
3753   /* Only consider real candidates.  */
3754   if (!cand->iv)
3755     return INFTY;
3756
3757   cbase = cand->iv->base;
3758   cstep = cand->iv->step;
3759   ctype = TREE_TYPE (cbase);
3760
3761   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3762     {
3763       /* We do not have a precision to express the values of use.  */
3764       return INFTY;
3765     }
3766
3767   if (address_p)
3768     {
3769       /* Do not try to express address of an object with computation based
3770          on address of a different object.  This may cause problems in rtl
3771          level alias analysis (that does not expect this to be happening,
3772          as this is illegal in C), and would be unlikely to be useful
3773          anyway.  */
3774       if (use->iv->base_object
3775           && cand->iv->base_object
3776           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3777         return INFTY;
3778     }
3779
3780   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3781     {
3782       /* TODO -- add direct handling of this case.  */
3783       goto fallback;
3784     }
3785
3786   /* CSTEPI is removed from the offset in case statement is after the
3787      increment.  If the step is not constant, we use zero instead.
3788      This is a bit imprecise (there is the extra addition), but
3789      redundancy elimination is likely to transform the code so that
3790      it uses value of the variable before increment anyway,
3791      so it is not that much unrealistic.  */
3792   if (cst_and_fits_in_hwi (cstep))
3793     cstepi = int_cst_value (cstep);
3794   else
3795     cstepi = 0;
3796
3797   if (cst_and_fits_in_hwi (ustep)
3798       && cst_and_fits_in_hwi (cstep))
3799     {
3800       ustepi = int_cst_value (ustep);
3801
3802       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3803         return INFTY;
3804     }
3805   else
3806     {
3807       tree rat;
3808
3809       rat = constant_multiple_of (utype, ustep, cstep);
3810
3811       if (!rat)
3812         return INFTY;
3813
3814       if (cst_and_fits_in_hwi (rat))
3815         ratio = int_cst_value (rat);
3816       else if (integer_onep (rat))
3817         ratio = 1;
3818       else if (integer_all_onesp (rat))
3819         ratio = -1;
3820       else
3821         return INFTY;
3822     }
3823
3824   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3825      or ratio == 1, it is better to handle this like
3826
3827      ubase - ratio * cbase + ratio * var
3828
3829      (also holds in the case ratio == -1, TODO.  */
3830
3831   if (cst_and_fits_in_hwi (cbase))
3832     {
3833       offset = - ratio * int_cst_value (cbase);
3834       cost += difference_cost (data,
3835                                ubase, integer_zero_node,
3836                                &symbol_present, &var_present, &offset,
3837                                depends_on);
3838     }
3839   else if (ratio == 1)
3840     {
3841       cost += difference_cost (data,
3842                                ubase, cbase,
3843                                &symbol_present, &var_present, &offset,
3844                                depends_on);
3845     }
3846   else
3847     {
3848       cost += force_var_cost (data, cbase, depends_on);
3849       cost += add_cost (TYPE_MODE (ctype));
3850       cost += difference_cost (data,
3851                                ubase, integer_zero_node,
3852                                &symbol_present, &var_present, &offset,
3853                                depends_on);
3854     }
3855
3856   /* If we are after the increment, the value of the candidate is higher by
3857      one iteration.  */
3858   if (stmt_after_increment (data->current_loop, cand, at))
3859     offset -= ratio * cstepi;
3860
3861   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3862      (symbol/var/const parts may be omitted).  If we are looking for an address,
3863      find the cost of addressing this.  */
3864   if (address_p)
3865     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3866
3867   /* Otherwise estimate the costs for computing the expression.  */
3868   aratio = ratio > 0 ? ratio : -ratio;
3869   if (!symbol_present && !var_present && !offset)
3870     {
3871       if (ratio != 1)
3872         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3873
3874       return cost;
3875     }
3876
3877   if (aratio != 1)
3878     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3879
3880   n_sums = 1;
3881   if (var_present
3882       /* Symbol + offset should be compile-time computable.  */
3883       && (symbol_present || offset))
3884     n_sums++;
3885
3886   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3887
3888 fallback:
3889   {
3890     /* Just get the expression, expand it and measure the cost.  */
3891     tree comp = get_computation_at (data->current_loop, use, cand, at);
3892
3893     if (!comp)
3894       return INFTY;
3895
3896     if (address_p)
3897       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3898
3899     return computation_cost (comp);
3900   }
3901 }
3902
3903 /* Determines the cost of the computation by that USE is expressed
3904    from induction variable CAND.  If ADDRESS_P is true, we just need
3905    to create an address from it, otherwise we want to get it into
3906    register.  A set of invariants we depend on is stored in
3907    DEPENDS_ON.  */
3908
3909 static unsigned
3910 get_computation_cost (struct ivopts_data *data,
3911                       struct iv_use *use, struct iv_cand *cand,
3912                       bool address_p, bitmap *depends_on)
3913 {
3914   return get_computation_cost_at (data,
3915                                   use, cand, address_p, depends_on, use->stmt);
3916 }
3917
3918 /* Determines cost of basing replacement of USE on CAND in a generic
3919    expression.  */
3920
3921 static bool
3922 determine_use_iv_cost_generic (struct ivopts_data *data,
3923                                struct iv_use *use, struct iv_cand *cand)
3924 {
3925   bitmap depends_on;
3926   unsigned cost;
3927
3928   /* The simple case first -- if we need to express value of the preserved
3929      original biv, the cost is 0.  This also prevents us from counting the
3930      cost of increment twice -- once at this use and once in the cost of
3931      the candidate.  */
3932   if (cand->pos == IP_ORIGINAL
3933       && cand->incremented_at == use->stmt)
3934     {
3935       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3936       return true;
3937     }
3938
3939   cost = get_computation_cost (data, use, cand, false, &depends_on);
3940   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3941
3942   return cost != INFTY;
3943 }
3944
3945 /* Determines cost of basing replacement of USE on CAND in an address.  */
3946
3947 static bool
3948 determine_use_iv_cost_address (struct ivopts_data *data,
3949                                struct iv_use *use, struct iv_cand *cand)
3950 {
3951   bitmap depends_on;
3952   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3953
3954   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3955
3956   return cost != INFTY;
3957 }
3958
3959 /* Computes value of induction variable IV in iteration NITER.  */
3960
3961 static tree
3962 iv_value (struct iv *iv, tree niter)
3963 {
3964   tree val;
3965   tree type = TREE_TYPE (iv->base);
3966
3967   niter = fold_convert (type, niter);
3968   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3969
3970   return fold_build2 (PLUS_EXPR, type, iv->base, val);
3971 }
3972
3973 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3974
3975 static tree
3976 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3977 {
3978   tree val = iv_value (cand->iv, niter);
3979   tree type = TREE_TYPE (cand->iv->base);
3980
3981   if (stmt_after_increment (loop, cand, at))
3982     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3983
3984   return val;
3985 }
3986
3987 /* Returns period of induction variable iv.  */
3988
3989 static tree
3990 iv_period (struct iv *iv)
3991 {
3992   tree step = iv->step, period, type;
3993   tree pow2div;
3994
3995   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3996
3997   /* Period of the iv is gcd (step, type range).  Since type range is power
3998      of two, it suffices to determine the maximum power of two that divides
3999      step.  */
4000   pow2div = num_ending_zeros (step);
4001   type = unsigned_type_for (TREE_TYPE (step));
4002
4003   period = build_low_bits_mask (type,
4004                                 (TYPE_PRECISION (type)
4005                                  - tree_low_cst (pow2div, 1)));
4006
4007   return period;
4008 }
4009
4010 /* Returns the comparison operator used when eliminating the iv USE.  */
4011
4012 static enum tree_code
4013 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4014 {
4015   struct loop *loop = data->current_loop;
4016   basic_block ex_bb;
4017   edge exit;
4018
4019   ex_bb = bb_for_stmt (use->stmt);
4020   exit = EDGE_SUCC (ex_bb, 0);
4021   if (flow_bb_inside_loop_p (loop, exit->dest))
4022     exit = EDGE_SUCC (ex_bb, 1);
4023
4024   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4025 }
4026
4027 /* Check whether it is possible to express the condition in USE by comparison
4028    of candidate CAND.  If so, store the value compared with to BOUND.  */
4029
4030 static bool
4031 may_eliminate_iv (struct ivopts_data *data,
4032                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4033 {
4034   basic_block ex_bb;
4035   edge exit;
4036   struct tree_niter_desc *niter;
4037   tree nit, nit_type;
4038   tree wider_type, period, per_type;
4039   struct loop *loop = data->current_loop;
4040
4041   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4042     return false;
4043
4044   /* For now works only for exits that dominate the loop latch.  TODO -- extend
4045      for other conditions inside loop body.  */
4046   ex_bb = bb_for_stmt (use->stmt);
4047   if (use->stmt != last_stmt (ex_bb)
4048       || TREE_CODE (use->stmt) != COND_EXPR)
4049     return false;
4050   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4051     return false;
4052
4053   exit = EDGE_SUCC (ex_bb, 0);
4054   if (flow_bb_inside_loop_p (loop, exit->dest))
4055     exit = EDGE_SUCC (ex_bb, 1);
4056   if (flow_bb_inside_loop_p (loop, exit->dest))
4057     return false;
4058
4059   niter = niter_for_exit (data, exit);
4060   if (!niter
4061       || !zero_p (niter->may_be_zero))
4062     return false;
4063
4064   nit = niter->niter;
4065   nit_type = TREE_TYPE (nit);
4066
4067   /* Determine whether we may use the variable to test whether niter iterations
4068      elapsed.  This is the case iff the period of the induction variable is
4069      greater than the number of iterations.  */
4070   period = iv_period (cand->iv);
4071   if (!period)
4072     return false;
4073   per_type = TREE_TYPE (period);
4074
4075   wider_type = TREE_TYPE (period);
4076   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4077     wider_type = per_type;
4078   else
4079     wider_type = nit_type;
4080
4081   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4082                                       fold_convert (wider_type, period),
4083                                       fold_convert (wider_type, nit))))
4084     return false;
4085
4086   *bound = cand_value_at (loop, cand, use->stmt, nit);
4087   return true;
4088 }
4089
4090 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4091
4092 static bool
4093 determine_use_iv_cost_condition (struct ivopts_data *data,
4094                                  struct iv_use *use, struct iv_cand *cand)
4095 {
4096   tree bound = NULL_TREE, op, cond;
4097   bitmap depends_on = NULL;
4098   unsigned cost;
4099
4100   /* Only consider real candidates.  */
4101   if (!cand->iv)
4102     {
4103       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4104       return false;
4105     }
4106
4107   if (may_eliminate_iv (data, use, cand, &bound))
4108     {
4109       cost = force_var_cost (data, bound, &depends_on);
4110
4111       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4112       return cost != INFTY;
4113     }
4114
4115   /* The induction variable elimination failed; just express the original
4116      giv.  If it is compared with an invariant, note that we cannot get
4117      rid of it.  */
4118   cost = get_computation_cost (data, use, cand, false, &depends_on);
4119
4120   cond = *use->op_p;
4121   if (TREE_CODE (cond) != SSA_NAME)
4122     {
4123       op = TREE_OPERAND (cond, 0);
4124       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4125         op = TREE_OPERAND (cond, 1);
4126       if (TREE_CODE (op) == SSA_NAME)
4127         {
4128           op = get_iv (data, op)->base;
4129           fd_ivopts_data = data;
4130           walk_tree (&op, find_depends, &depends_on, NULL);
4131         }
4132     }
4133
4134   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4135   return cost != INFTY;
4136 }
4137
4138 /* Checks whether it is possible to replace the final value of USE by
4139    a direct computation.  If so, the formula is stored to *VALUE.  */
4140
4141 static bool
4142 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4143                          tree *value)
4144 {
4145   struct loop *loop = data->current_loop;
4146   edge exit;
4147   struct tree_niter_desc *niter;
4148
4149   exit = single_dom_exit (loop);
4150   if (!exit)
4151     return false;
4152
4153   gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4154                               bb_for_stmt (use->stmt)));
4155
4156   niter = niter_for_single_dom_exit (data);
4157   if (!niter
4158       || !zero_p (niter->may_be_zero))
4159     return false;
4160
4161   *value = iv_value (use->iv, niter->niter);
4162
4163   return true;
4164 }
4165
4166 /* Determines cost of replacing final value of USE using CAND.  */
4167
4168 static bool
4169 determine_use_iv_cost_outer (struct ivopts_data *data,
4170                              struct iv_use *use, struct iv_cand *cand)
4171 {
4172   bitmap depends_on;
4173   unsigned cost;
4174   edge exit;
4175   tree value = NULL_TREE;
4176   struct loop *loop = data->current_loop;
4177
4178   /* The simple case first -- if we need to express value of the preserved
4179      original biv, the cost is 0.  This also prevents us from counting the
4180      cost of increment twice -- once at this use and once in the cost of
4181      the candidate.  */
4182   if (cand->pos == IP_ORIGINAL
4183       && cand->incremented_at == use->stmt)
4184     {
4185       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4186       return true;
4187     }
4188
4189   if (!cand->iv)
4190     {
4191       if (!may_replace_final_value (data, use, &value))
4192         {
4193           set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4194           return false;
4195         }
4196
4197       depends_on = NULL;
4198       cost = force_var_cost (data, value, &depends_on);
4199
4200       cost /= AVG_LOOP_NITER (loop);
4201
4202       set_use_iv_cost (data, use, cand, cost, depends_on, value);
4203       return cost != INFTY;
4204     }
4205
4206   exit = single_dom_exit (loop);
4207   if (exit)
4208     {
4209       /* If there is just a single exit, we may use value of the candidate
4210          after we take it to determine the value of use.  */
4211       cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4212                                       last_stmt (exit->src));
4213       if (cost != INFTY)
4214         cost /= AVG_LOOP_NITER (loop);
4215     }
4216   else
4217     {
4218       /* Otherwise we just need to compute the iv.  */
4219       cost = get_computation_cost (data, use, cand, false, &depends_on);
4220     }
4221
4222   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4223
4224   return cost != INFTY;
4225 }
4226
4227 /* Determines cost of basing replacement of USE on CAND.  Returns false
4228    if USE cannot be based on CAND.  */
4229
4230 static bool
4231 determine_use_iv_cost (struct ivopts_data *data,
4232                        struct iv_use *use, struct iv_cand *cand)
4233 {
4234   switch (use->type)
4235     {
4236     case USE_NONLINEAR_EXPR:
4237       return determine_use_iv_cost_generic (data, use, cand);
4238
4239     case USE_OUTER:
4240       return determine_use_iv_cost_outer (data, use, cand);
4241
4242     case USE_ADDRESS:
4243       return determine_use_iv_cost_address (data, use, cand);
4244
4245     case USE_COMPARE:
4246       return determine_use_iv_cost_condition (data, use, cand);
4247
4248     default:
4249       gcc_unreachable ();
4250     }
4251 }
4252
4253 /* Determines costs of basing the use of the iv on an iv candidate.  */
4254
4255 static void
4256 determine_use_iv_costs (struct ivopts_data *data)
4257 {
4258   unsigned i, j;
4259   struct iv_use *use;
4260   struct iv_cand *cand;
4261   bitmap to_clear = BITMAP_ALLOC (NULL);
4262
4263   alloc_use_cost_map (data);
4264
4265   for (i = 0; i < n_iv_uses (data); i++)
4266     {
4267       use = iv_use (data, i);
4268
4269       if (data->consider_all_candidates)
4270         {
4271           for (j = 0; j < n_iv_cands (data); j++)
4272             {
4273               cand = iv_cand (data, j);
4274               determine_use_iv_cost (data, use, cand);
4275             }
4276         }
4277       else
4278         {
4279           bitmap_iterator bi;
4280
4281           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4282             {
4283               cand = iv_cand (data, j);
4284               if (!determine_use_iv_cost (data, use, cand))
4285                 bitmap_set_bit (to_clear, j);
4286             }
4287
4288           /* Remove the candidates for that the cost is infinite from
4289              the list of related candidates.  */
4290           bitmap_and_compl_into (use->related_cands, to_clear);
4291           bitmap_clear (to_clear);
4292         }
4293     }
4294
4295   BITMAP_FREE (to_clear);
4296
4297   if (dump_file && (dump_flags & TDF_DETAILS))
4298     {
4299       fprintf (dump_file, "Use-candidate costs:\n");
4300
4301       for (i = 0; i < n_iv_uses (data); i++)
4302         {
4303           use = iv_use (data, i);
4304
4305           fprintf (dump_file, "Use %d:\n", i);
4306           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4307           for (j = 0; j < use->n_map_members; j++)
4308             {
4309               if (!use->cost_map[j].cand
4310                   || use->cost_map[j].cost == INFTY)
4311                 continue;
4312
4313               fprintf (dump_file, "  %d\t%d\t",
4314                        use->cost_map[j].cand->id,
4315                        use->cost_map[j].cost);
4316               if (use->cost_map[j].depends_on)
4317                 bitmap_print (dump_file,
4318                               use->cost_map[j].depends_on, "","");
4319               fprintf (dump_file, "\n");
4320             }
4321
4322           fprintf (dump_file, "\n");
4323         }
4324       fprintf (dump_file, "\n");
4325     }
4326 }
4327
4328 /* Determines cost of the candidate CAND.  */
4329
4330 static void
4331 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4332 {
4333   unsigned cost_base, cost_step;
4334   tree base;
4335
4336   if (!cand->iv)
4337     {
4338       cand->cost = 0;
4339       return;
4340     }
4341
4342   /* There are two costs associated with the candidate -- its increment
4343      and its initialization.  The second is almost negligible for any loop
4344      that rolls enough, so we take it just very little into account.  */
4345
4346   base = cand->iv->base;
4347   cost_base = force_var_cost (data, base, NULL);
4348   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4349
4350   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4351
4352   /* Prefer the original iv unless we may gain something by replacing it;
4353      this is not really relevant for artificial ivs created by other
4354      passes.  */
4355   if (cand->pos == IP_ORIGINAL
4356       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4357     cand->cost--;
4358
4359   /* Prefer not to insert statements into latch unless there are some
4360      already (so that we do not create unnecessary jumps).  */
4361   if (cand->pos == IP_END
4362       && empty_block_p (ip_end_pos (data->current_loop)))
4363     cand->cost++;
4364 }
4365
4366 /* Determines costs of computation of the candidates.  */
4367
4368 static void
4369 determine_iv_costs (struct ivopts_data *data)
4370 {
4371   unsigned i;
4372
4373   if (dump_file && (dump_flags & TDF_DETAILS))
4374     {
4375       fprintf (dump_file, "Candidate costs:\n");
4376       fprintf (dump_file, "  cand\tcost\n");
4377     }
4378
4379   for (i = 0; i < n_iv_cands (data); i++)
4380     {
4381       struct iv_cand *cand = iv_cand (data, i);
4382
4383       determine_iv_cost (data, cand);
4384
4385       if (dump_file && (dump_flags & TDF_DETAILS))
4386         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4387     }
4388
4389 if (dump_file && (dump_flags & TDF_DETAILS))
4390       fprintf (dump_file, "\n");
4391 }
4392
4393 /* Calculates cost for having SIZE induction variables.  */
4394
4395 static unsigned
4396 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4397 {
4398   return global_cost_for_size (size,
4399                                loop_data (data->current_loop)->regs_used,
4400                                n_iv_uses (data));
4401 }
4402
4403 /* For each size of the induction variable set determine the penalty.  */
4404
4405 static void
4406 determine_set_costs (struct ivopts_data *data)
4407 {
4408   unsigned j, n;
4409   tree phi, op;
4410   struct loop *loop = data->current_loop;
4411   bitmap_iterator bi;
4412
4413   /* We use the following model (definitely improvable, especially the
4414      cost function -- TODO):
4415
4416      We estimate the number of registers available (using MD data), name it A.
4417
4418      We estimate the number of registers used by the loop, name it U.  This
4419      number is obtained as the number of loop phi nodes (not counting virtual
4420      registers and bivs) + the number of variables from outside of the loop.
4421
4422      We set a reserve R (free regs that are used for temporary computations,
4423      etc.).  For now the reserve is a constant 3.
4424
4425      Let I be the number of induction variables.
4426
4427      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4428         make a lot of ivs without a reason).
4429      -- if A - R < U + I <= A, the cost is I * PRES_COST
4430      -- if U + I > A, the cost is I * PRES_COST and
4431         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4432
4433   if (dump_file && (dump_flags & TDF_DETAILS))
4434     {
4435       fprintf (dump_file, "Global costs:\n");
4436       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4437       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4438       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4439       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4440     }
4441
4442   n = 0;
4443   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4444     {
4445       op = PHI_RESULT (phi);
4446
4447       if (!is_gimple_reg (op))
4448         continue;
4449
4450       if (get_iv (data, op))
4451         continue;
4452
4453       n++;
4454     }
4455
4456   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4457     {
4458       struct version_info *info = ver_info (data, j);
4459
4460       if (info->inv_id && info->has_nonlin_use)
4461         n++;
4462     }
4463
4464   loop_data (loop)->regs_used = n;
4465   if (dump_file && (dump_flags & TDF_DETAILS))
4466     fprintf (dump_file, "  regs_used %d\n", n);
4467
4468   if (dump_file && (dump_flags & TDF_DETAILS))
4469     {
4470       fprintf (dump_file, "  cost for size:\n");
4471       fprintf (dump_file, "  ivs\tcost\n");
4472       for (j = 0; j <= 2 * target_avail_regs; j++)
4473         fprintf (dump_file, "  %d\t%d\n", j,
4474                  ivopts_global_cost_for_size (data, j));
4475       fprintf (dump_file, "\n");
4476     }
4477 }
4478
4479 /* Returns true if A is a cheaper cost pair than B.  */
4480
4481 static bool
4482 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4483 {
4484   if (!a)
4485     return false;
4486
4487   if (!b)
4488     return true;
4489
4490   if (a->cost < b->cost)
4491     return true;
4492
4493   if (a->cost > b->cost)
4494     return false;
4495
4496   /* In case the costs are the same, prefer the cheaper candidate.  */
4497   if (a->cand->cost < b->cand->cost)
4498     return true;
4499
4500   return false;
4501 }
4502
4503 /* Computes the cost field of IVS structure.  */
4504
4505 static void
4506 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4507 {
4508   unsigned cost = 0;
4509
4510   cost += ivs->cand_use_cost;
4511   cost += ivs->cand_cost;
4512   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4513
4514   ivs->cost = cost;
4515 }
4516
4517 /* Remove invariants in set INVS to set IVS.  */
4518
4519 static void
4520 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4521 {
4522   bitmap_iterator bi;
4523   unsigned iid;
4524
4525   if (!invs)
4526     return;
4527
4528   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4529     {
4530       ivs->n_invariant_uses[iid]--;
4531       if (ivs->n_invariant_uses[iid] == 0)
4532         ivs->n_regs--;
4533     }
4534 }
4535
4536 /* Set USE not to be expressed by any candidate in IVS.  */
4537
4538 static void
4539 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4540                  struct iv_use *use)
4541 {
4542   unsigned uid = use->id, cid;
4543   struct cost_pair *cp;
4544
4545   cp = ivs->cand_for_use[uid];
4546   if (!cp)
4547     return;
4548   cid = cp->cand->id;
4549
4550   ivs->bad_uses++;
4551   ivs->cand_for_use[uid] = NULL;
4552   ivs->n_cand_uses[cid]--;
4553
4554   if (ivs->n_cand_uses[cid] == 0)
4555     {
4556       bitmap_clear_bit (ivs->cands, cid);
4557       /* Do not count the pseudocandidates.  */
4558       if (cp->cand->iv)
4559         ivs->n_regs--;
4560       ivs->n_cands--;
4561       ivs->cand_cost -= cp->cand->cost;
4562
4563       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4564     }
4565
4566   ivs->cand_use_cost -= cp->cost;
4567
4568   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4569   iv_ca_recount_cost (data, ivs);
4570 }
4571
4572 /* Add invariants in set INVS to set IVS.  */
4573
4574 static void
4575 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4576 {
4577   bitmap_iterator bi;
4578   unsigned iid;
4579
4580   if (!invs)
4581     return;
4582
4583   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4584     {
4585       ivs->n_invariant_uses[iid]++;
4586       if (ivs->n_invariant_uses[iid] == 1)
4587         ivs->n_regs++;
4588     }
4589 }
4590
4591 /* Set cost pair for USE in set IVS to CP.  */
4592
4593 static void
4594 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4595               struct iv_use *use, struct cost_pair *cp)
4596 {
4597   unsigned uid = use->id, cid;
4598
4599   if (ivs->cand_for_use[uid] == cp)
4600     return;
4601
4602   if (ivs->cand_for_use[uid])
4603     iv_ca_set_no_cp (data, ivs, use);
4604
4605   if (cp)
4606     {
4607       cid = cp->cand->id;
4608
4609       ivs->bad_uses--;
4610       ivs->cand_for_use[uid] = cp;
4611       ivs->n_cand_uses[cid]++;
4612       if (ivs->n_cand_uses[cid] == 1)
4613         {
4614           bitmap_set_bit (ivs->cands, cid);
4615           /* Do not count the pseudocandidates.  */
4616           if (cp->cand->iv)
4617             ivs->n_regs++;
4618           ivs->n_cands++;
4619           ivs->cand_cost += cp->cand->cost;
4620
4621           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4622         }
4623
4624       ivs->cand_use_cost += cp->cost;
4625       iv_ca_set_add_invariants (ivs, cp->depends_on);
4626       iv_ca_recount_cost (data, ivs);
4627     }
4628 }
4629
4630 /* Extend set IVS by expressing USE by some of the candidates in it
4631    if possible.  */
4632
4633 static void
4634 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4635                struct iv_use *use)
4636 {
4637   struct cost_pair *best_cp = NULL, *cp;
4638   bitmap_iterator bi;
4639   unsigned i;
4640
4641   gcc_assert (ivs->upto >= use->id);
4642
4643   if (ivs->upto == use->id)
4644     {
4645       ivs->upto++;
4646       ivs->bad_uses++;
4647     }
4648
4649   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4650     {
4651       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4652
4653       if (cheaper_cost_pair (cp, best_cp))
4654         best_cp = cp;
4655     }
4656
4657   iv_ca_set_cp (data, ivs, use, best_cp);
4658 }
4659
4660 /* Get cost for assignment IVS.  */
4661
4662 static unsigned
4663 iv_ca_cost (struct iv_ca *ivs)
4664 {
4665   return (ivs->bad_uses ? INFTY : ivs->cost);
4666 }
4667
4668 /* Returns true if all dependences of CP are among invariants in IVS.  */
4669
4670 static bool
4671 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4672 {
4673   unsigned i;
4674   bitmap_iterator bi;
4675
4676   if (!cp->depends_on)
4677     return true;
4678
4679   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4680     {
4681       if (ivs->n_invariant_uses[i] == 0)
4682         return false;
4683     }
4684
4685   return true;
4686 }
4687
4688 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4689    it before NEXT_CHANGE.  */
4690
4691 static struct iv_ca_delta *
4692 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4693                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4694 {
4695   struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4696
4697   change->use = use;
4698   change->old_cp = old_cp;
4699   change->new_cp = new_cp;
4700   change->next_change = next_change;
4701
4702   return change;
4703 }
4704
4705 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4706    are rewritten.  */
4707
4708 static struct iv_ca_delta *
4709 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4710 {
4711   struct iv_ca_delta *last;
4712
4713   if (!l2)
4714     return l1;
4715
4716   if (!l1)
4717     return l2;
4718
4719   for (last = l1; last->next_change; last = last->next_change)
4720     continue;
4721   last->next_change = l2;
4722
4723   return l1;
4724 }
4725
4726 /* Returns candidate by that USE is expressed in IVS.  */
4727
4728 static struct cost_pair *
4729 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4730 {
4731   return ivs->cand_for_use[use->id];
4732 }
4733
4734 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4735
4736 static struct iv_ca_delta *
4737 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4738 {
4739   struct iv_ca_delta *act, *next, *prev = NULL;
4740   struct cost_pair *tmp;
4741
4742   for (act = delta; act; act = next)
4743     {
4744       next = act->next_change;
4745       act->next_change = prev;
4746       prev = act;
4747
4748       tmp = act->old_cp;
4749       act->old_cp = act->new_cp;
4750       act->new_cp = tmp;
4751     }
4752
4753   return prev;
4754 }
4755
4756 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4757    reverted instead.  */
4758
4759 static void
4760 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4761                     struct iv_ca_delta *delta, bool forward)
4762 {
4763   struct cost_pair *from, *to;
4764   struct iv_ca_delta *act;
4765
4766   if (!forward)
4767     delta = iv_ca_delta_reverse (delta);
4768
4769   for (act = delta; act; act = act->next_change)
4770     {
4771       from = act->old_cp;
4772       to = act->new_cp;
4773       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4774       iv_ca_set_cp (data, ivs, act->use, to);
4775     }
4776
4777   if (!forward)
4778     iv_ca_delta_reverse (delta);
4779 }
4780
4781 /* Returns true if CAND is used in IVS.  */
4782
4783 static bool
4784 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4785 {
4786   return ivs->n_cand_uses[cand->id] > 0;
4787 }
4788
4789 /* Returns number of induction variable candidates in the set IVS.  */
4790
4791 static unsigned
4792 iv_ca_n_cands (struct iv_ca *ivs)
4793 {
4794   return ivs->n_cands;
4795 }
4796
4797 /* Free the list of changes DELTA.  */
4798
4799 static void
4800 iv_ca_delta_free (struct iv_ca_delta **delta)
4801 {
4802   struct iv_ca_delta *act, *next;
4803
4804   for (act = *delta; act; act = next)
4805     {
4806       next = act->next_change;
4807       free (act);
4808     }
4809
4810   *delta = NULL;
4811 }
4812
4813 /* Allocates new iv candidates assignment.  */
4814
4815 static struct iv_ca *
4816 iv_ca_new (struct ivopts_data *data)
4817 {
4818   struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4819
4820   nw->upto = 0;
4821   nw->bad_uses = 0;
4822   nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4823   nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4824   nw->cands = BITMAP_ALLOC (NULL);
4825   nw->n_cands = 0;
4826   nw->n_regs = 0;
4827   nw->cand_use_cost = 0;
4828   nw->cand_cost = 0;
4829   nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4830   nw->cost = 0;
4831
4832   return nw;
4833 }
4834
4835 /* Free memory occupied by the set IVS.  */
4836
4837 static void
4838 iv_ca_free (struct iv_ca **ivs)
4839 {
4840   free ((*ivs)->cand_for_use);
4841   free ((*ivs)->n_cand_uses);
4842   BITMAP_FREE ((*ivs)->cands);
4843   free ((*ivs)->n_invariant_uses);
4844   free (*ivs);
4845   *ivs = NULL;
4846 }
4847
4848 /* Dumps IVS to FILE.  */
4849
4850 static void
4851 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4852 {
4853   const char *pref = "  invariants ";
4854   unsigned i;
4855
4856   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4857   bitmap_print (file, ivs->cands, "  candidates ","\n");
4858
4859   for (i = 1; i <= data->max_inv_id; i++)
4860     if (ivs->n_invariant_uses[i])
4861       {
4862         fprintf (file, "%s%d", pref, i);
4863         pref = ", ";
4864       }
4865   fprintf (file, "\n");
4866 }
4867
4868 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4869    new set, and store differences in DELTA.  Number of induction variables
4870    in the new set is stored to N_IVS.  */
4871
4872 static unsigned
4873 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4874               struct iv_cand *cand, struct iv_ca_delta **delta,
4875               unsigned *n_ivs)
4876 {
4877   unsigned i, cost;
4878   struct iv_use *use;
4879   struct cost_pair *old_cp, *new_cp;
4880
4881   *delta = NULL;
4882   for (i = 0; i < ivs->upto; i++)
4883     {
4884       use = iv_use (data, i);
4885       old_cp = iv_ca_cand_for_use (ivs, use);
4886
4887       if (old_cp
4888           && old_cp->cand == cand)
4889         continue;
4890
4891       new_cp = get_use_iv_cost (data, use, cand);
4892       if (!new_cp)
4893         continue;
4894
4895       if (!iv_ca_has_deps (ivs, new_cp))
4896         continue;
4897
4898       if (!cheaper_cost_pair (new_cp, old_cp))
4899         continue;
4900
4901       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4902     }
4903
4904   iv_ca_delta_commit (data, ivs, *delta, true);
4905   cost = iv_ca_cost (ivs);
4906   if (n_ivs)
4907     *n_ivs = iv_ca_n_cands (ivs);
4908   iv_ca_delta_commit (data, ivs, *delta, false);
4909
4910   return cost;
4911 }
4912
4913 /* Try narrowing set IVS by removing CAND.  Return the cost of
4914    the new set and store the differences in DELTA.  */
4915
4916 static unsigned
4917 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4918               struct iv_cand *cand, struct iv_ca_delta **delta)
4919 {
4920   unsigned i, ci;
4921   struct iv_use *use;
4922   struct cost_pair *old_cp, *new_cp, *cp;
4923   bitmap_iterator bi;
4924   struct iv_cand *cnd;
4925   unsigned cost;
4926
4927   *delta = NULL;
4928   for (i = 0; i < n_iv_uses (data); i++)
4929     {
4930       use = iv_use (data, i);
4931
4932       old_cp = iv_ca_cand_for_use (ivs, use);
4933       if (old_cp->cand != cand)
4934         continue;
4935
4936       new_cp = NULL;
4937
4938       if (data->consider_all_candidates)
4939         {
4940           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4941             {
4942               if (ci == cand->id)
4943                 continue;
4944
4945               cnd = iv_cand (data, ci);
4946
4947               cp = get_use_iv_cost (data, use, cnd);
4948               if (!cp)
4949                 continue;
4950               if (!iv_ca_has_deps (ivs, cp))
4951                 continue;
4952
4953               if (!cheaper_cost_pair (cp, new_cp))
4954                 continue;
4955
4956               new_cp = cp;
4957             }
4958         }
4959       else
4960         {
4961           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4962             {
4963               if (ci == cand->id)
4964                 continue;
4965
4966               cnd = iv_cand (data, ci);
4967
4968               cp = get_use_iv_cost (data, use, cnd);
4969               if (!cp)
4970                 continue;
4971               if (!iv_ca_has_deps (ivs, cp))
4972                 continue;
4973
4974               if (!cheaper_cost_pair (cp, new_cp))
4975                 continue;
4976
4977               new_cp = cp;
4978             }
4979         }
4980
4981       if (!new_cp)
4982         {
4983           iv_ca_delta_free (delta);
4984           return INFTY;
4985         }
4986
4987       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4988     }
4989
4990   iv_ca_delta_commit (data, ivs, *delta, true);
4991   cost = iv_ca_cost (ivs);
4992   iv_ca_delta_commit (data, ivs, *delta, false);
4993
4994   return cost;
4995 }
4996
4997 /* Try optimizing the set of candidates IVS by removing candidates different
4998    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4999    differences in DELTA.  */
5000
5001 static unsigned
5002 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5003              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5004 {
5005   bitmap_iterator bi;
5006   struct iv_ca_delta *act_delta, *best_delta;
5007   unsigned i, best_cost, acost;
5008   struct iv_cand *cand;
5009
5010   best_delta = NULL;
5011   best_cost = iv_ca_cost (ivs);
5012
5013   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5014     {
5015       cand = iv_cand (data, i);
5016
5017       if (cand == except_cand)
5018         continue;
5019
5020       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5021
5022       if (acost < best_cost)
5023         {
5024           best_cost = acost;
5025           iv_ca_delta_free (&best_delta);
5026           best_delta = act_delta;
5027         }
5028       else
5029         iv_ca_delta_free (&act_delta);
5030     }
5031
5032   if (!best_delta)
5033     {
5034       *delta = NULL;
5035       return best_cost;
5036     }
5037
5038   /* Recurse to possibly remove other unnecessary ivs.  */
5039   iv_ca_delta_commit (data, ivs, best_delta, true);
5040   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5041   iv_ca_delta_commit (data, ivs, best_delta, false);
5042   *delta = iv_ca_delta_join (best_delta, *delta);
5043   return best_cost;
5044 }
5045
5046 /* Tries to extend the sets IVS in the best possible way in order
5047    to express the USE.  */
5048
5049 static bool
5050 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5051                   struct iv_use *use)
5052 {
5053   unsigned best_cost, act_cost;
5054   unsigned i;
5055   bitmap_iterator bi;
5056   struct iv_cand *cand;
5057   struct iv_ca_delta *best_delta = NULL, *act_delta;
5058   struct cost_pair *cp;
5059
5060   iv_ca_add_use (data, ivs, use);
5061   best_cost = iv_ca_cost (ivs);
5062
5063   cp = iv_ca_cand_for_use (ivs, use);
5064   if (cp)
5065     {
5066       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5067       iv_ca_set_no_cp (data, ivs, use);
5068     }
5069
5070   /* First try important candidates.  Only if it fails, try the specific ones.
5071      Rationale -- in loops with many variables the best choice often is to use
5072      just one generic biv.  If we added here many ivs specific to the uses,
5073      the optimization algorithm later would be likely to get stuck in a local
5074      minimum, thus causing us to create too many ivs.  The approach from
5075      few ivs to more seems more likely to be successful -- starting from few
5076      ivs, replacing an expensive use by a specific iv should always be a
5077      win.  */
5078   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5079     {
5080       cand = iv_cand (data, i);
5081
5082       if (iv_ca_cand_used_p (ivs, cand))
5083         continue;
5084
5085       cp = get_use_iv_cost (data, use, cand);
5086       if (!cp)
5087         continue;
5088
5089       iv_ca_set_cp (data, ivs, use, cp);
5090       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5091       iv_ca_set_no_cp (data, ivs, use);
5092       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5093
5094       if (act_cost < best_cost)
5095         {
5096           best_cost = act_cost;
5097
5098           iv_ca_delta_free (&best_delta);
5099           best_delta = act_delta;
5100         }
5101       else
5102         iv_ca_delta_free (&act_delta);
5103     }
5104
5105   if (best_cost == INFTY)
5106     {
5107       for (i = 0; i < use->n_map_members; i++)
5108         {
5109           cp = use->cost_map + i;
5110           cand = cp->cand;
5111           if (!cand)
5112             continue;
5113
5114           /* Already tried this.  */
5115           if (cand->important)
5116             continue;
5117
5118           if (iv_ca_cand_used_p (ivs, cand))
5119             continue;
5120
5121           act_delta = NULL;
5122           iv_ca_set_cp (data, ivs, use, cp);
5123           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5124           iv_ca_set_no_cp (data, ivs, use);
5125           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5126                                        cp, act_delta);
5127
5128           if (act_cost < best_cost)
5129             {
5130               best_cost = act_cost;
5131
5132               if (best_delta)
5133                 iv_ca_delta_free (&best_delta);
5134               best_delta = act_delta;
5135             }
5136           else
5137             iv_ca_delta_free (&act_delta);
5138         }
5139     }
5140
5141   iv_ca_delta_commit (data, ivs, best_delta, true);
5142   iv_ca_delta_free (&best_delta);
5143
5144   return (best_cost != INFTY);
5145 }
5146
5147 /* Finds an initial assignment of candidates to uses.  */
5148
5149 static struct iv_ca *
5150 get_initial_solution (struct ivopts_data *data)
5151 {
5152   struct iv_ca *ivs = iv_ca_new (data);
5153   unsigned i;
5154
5155   for (i = 0; i < n_iv_uses (data); i++)
5156     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5157       {
5158         iv_ca_free (&ivs);
5159         return NULL;
5160       }
5161
5162   return ivs;
5163 }
5164
5165 /* Tries to improve set of induction variables IVS.  */
5166
5167 static bool
5168 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5169 {
5170   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5171   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5172   struct iv_cand *cand;
5173
5174   /* Try extending the set of induction variables by one.  */
5175   for (i = 0; i < n_iv_cands (data); i++)
5176     {
5177       cand = iv_cand (data, i);
5178
5179       if (iv_ca_cand_used_p (ivs, cand))
5180         continue;
5181
5182       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5183       if (!act_delta)
5184         continue;
5185
5186       /* If we successfully added the candidate and the set is small enough,
5187          try optimizing it by removing other candidates.  */
5188       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5189         {
5190           iv_ca_delta_commit (data, ivs, act_delta, true);
5191           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5192           iv_ca_delta_commit (data, ivs, act_delta, false);
5193           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5194         }
5195
5196       if (acost < best_cost)
5197         {
5198           best_cost = acost;
5199           iv_ca_delta_free (&best_delta);
5200           best_delta = act_delta;
5201         }
5202       else
5203         iv_ca_delta_free (&act_delta);
5204     }
5205
5206   if (!best_delta)
5207     {
5208       /* Try removing the candidates from the set instead.  */
5209       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5210
5211       /* Nothing more we can do.  */
5212       if (!best_delta)
5213         return false;
5214     }
5215
5216   iv_ca_delta_commit (data, ivs, best_delta, true);
5217   gcc_assert (best_cost == iv_ca_cost (ivs));
5218   iv_ca_delta_free (&best_delta);
5219   return true;
5220 }
5221
5222 /* Attempts to find the optimal set of induction variables.  We do simple
5223    greedy heuristic -- we try to replace at most one candidate in the selected
5224    solution and remove the unused ivs while this improves the cost.  */
5225
5226 static struct iv_ca *
5227 find_optimal_iv_set (struct ivopts_data *data)
5228 {
5229   unsigned i;
5230   struct iv_ca *set;
5231   struct iv_use *use;
5232
5233   /* Get the initial solution.  */
5234   set = get_initial_solution (data);
5235   if (!set)
5236     {
5237       if (dump_file && (dump_flags & TDF_DETAILS))
5238         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5239       return NULL;
5240     }
5241
5242   if (dump_file && (dump_flags & TDF_DETAILS))
5243     {
5244       fprintf (dump_file, "Initial set of candidates:\n");
5245       iv_ca_dump (data, dump_file, set);
5246     }
5247
5248   while (try_improve_iv_set (data, set))
5249     {
5250       if (dump_file && (dump_flags & TDF_DETAILS))
5251         {
5252           fprintf (dump_file, "Improved to:\n");
5253           iv_ca_dump (data, dump_file, set);
5254         }
5255     }
5256
5257   if (dump_file && (dump_flags & TDF_DETAILS))
5258     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5259
5260   for (i = 0; i < n_iv_uses (data); i++)
5261     {
5262       use = iv_use (data, i);
5263       use->selected = iv_ca_cand_for_use (set, use)->cand;
5264     }
5265
5266   return set;
5267 }
5268
5269 /* Creates a new induction variable corresponding to CAND.  */
5270
5271 static void
5272 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5273 {
5274   block_stmt_iterator incr_pos;
5275   tree base;
5276   bool after = false;
5277
5278   if (!cand->iv)
5279     return;
5280
5281   switch (cand->pos)
5282     {
5283     case IP_NORMAL:
5284       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5285       break;
5286
5287     case IP_END:
5288       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5289       after = true;
5290       break;
5291
5292     case IP_ORIGINAL:
5293       /* Mark that the iv is preserved.  */
5294       name_info (data, cand->var_before)->preserve_biv = true;
5295       name_info (data, cand->var_after)->preserve_biv = true;
5296
5297       /* Rewrite the increment so that it uses var_before directly.  */
5298       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5299
5300       return;
5301     }
5302
5303   gimple_add_tmp_var (cand->var_before);
5304   add_referenced_tmp_var (cand->var_before);
5305
5306   base = unshare_expr (cand->iv->base);
5307
5308   create_iv (base, unshare_expr (cand->iv->step),
5309              cand->var_before, data->current_loop,
5310              &incr_pos, after, &cand->var_before, &cand->var_after);
5311 }
5312
5313 /* Creates new induction variables described in SET.  */
5314
5315 static void
5316 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5317 {
5318   unsigned i;
5319   struct iv_cand *cand;
5320   bitmap_iterator bi;
5321
5322   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5323     {
5324       cand = iv_cand (data, i);
5325       create_new_iv (data, cand);
5326     }
5327 }
5328
5329 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5330    is true, remove also the ssa name defined by the statement.  */
5331
5332 static void
5333 remove_statement (tree stmt, bool including_defined_name)
5334 {
5335   if (TREE_CODE (stmt) == PHI_NODE)
5336     {
5337       if (!including_defined_name)
5338         {
5339           /* Prevent the ssa name defined by the statement from being removed.  */
5340           SET_PHI_RESULT (stmt, NULL);
5341         }
5342       remove_phi_node (stmt, NULL_TREE);
5343     }
5344   else
5345     {
5346       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5347
5348       bsi_remove (&bsi);
5349     }
5350 }
5351
5352 /* Rewrites USE (definition of iv used in a nonlinear expression)
5353    using candidate CAND.  */
5354
5355 static void
5356 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5357                             struct iv_use *use, struct iv_cand *cand)
5358 {
5359   tree comp;
5360   tree op, stmts, tgt, ass;
5361   block_stmt_iterator bsi, pbsi;
5362
5363   /* An important special case -- if we are asked to express value of
5364      the original iv by itself, just exit; there is no need to
5365      introduce a new computation (that might also need casting the
5366      variable to unsigned and back).  */
5367   if (cand->pos == IP_ORIGINAL
5368       && cand->incremented_at == use->stmt)
5369     {
5370       tree step, ctype, utype;
5371       enum tree_code incr_code = PLUS_EXPR;
5372
5373       gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5374       gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5375
5376       step = cand->iv->step;
5377       ctype = TREE_TYPE (step);
5378       utype = TREE_TYPE (cand->var_after);
5379       if (TREE_CODE (step) == NEGATE_EXPR)
5380         {
5381           incr_code = MINUS_EXPR;
5382           step = TREE_OPERAND (step, 0);
5383         }
5384
5385       /* Check whether we may leave the computation unchanged.
5386          This is the case only if it does not rely on other
5387          computations in the loop -- otherwise, the computation
5388          we rely upon may be removed in remove_unused_ivs,
5389          thus leading to ICE.  */
5390       op = TREE_OPERAND (use->stmt, 1);
5391       if (TREE_CODE (op) == PLUS_EXPR
5392           || TREE_CODE (op) == MINUS_EXPR)
5393         {
5394           if (TREE_OPERAND (op, 0) == cand->var_before)
5395             op = TREE_OPERAND (op, 1);
5396           else if (TREE_CODE (op) == PLUS_EXPR
5397                    && TREE_OPERAND (op, 1) == cand->var_before)
5398             op = TREE_OPERAND (op, 0);
5399           else
5400             op = NULL_TREE;
5401         }
5402       else
5403         op = NULL_TREE;
5404
5405       if (op
5406           && (TREE_CODE (op) == INTEGER_CST
5407               || operand_equal_p (op, step, 0)))
5408         return;
5409
5410       /* Otherwise, add the necessary computations to express
5411          the iv.  */
5412       op = fold_convert (ctype, cand->var_before);
5413       comp = fold_convert (utype,
5414                            build2 (incr_code, ctype, op,
5415                                    unshare_expr (step)));
5416     }
5417   else
5418     comp = get_computation (data->current_loop, use, cand);
5419
5420   switch (TREE_CODE (use->stmt))
5421     {
5422     case PHI_NODE:
5423       tgt = PHI_RESULT (use->stmt);
5424
5425       /* If we should keep the biv, do not replace it.  */
5426       if (name_info (data, tgt)->preserve_biv)
5427         return;
5428
5429       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5430       while (!bsi_end_p (pbsi)
5431              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5432         {
5433           bsi = pbsi;
5434           bsi_next (&pbsi);
5435         }
5436       break;
5437
5438     case MODIFY_EXPR:
5439       tgt = TREE_OPERAND (use->stmt, 0);
5440       bsi = bsi_for_stmt (use->stmt);
5441       break;
5442
5443     default:
5444       gcc_unreachable ();
5445     }
5446
5447   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5448
5449   if (TREE_CODE (use->stmt) == PHI_NODE)
5450     {
5451       if (stmts)
5452         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5453       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5454       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5455       remove_statement (use->stmt, false);
5456       SSA_NAME_DEF_STMT (tgt) = ass;
5457     }
5458   else
5459     {
5460       if (stmts)
5461         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5462       TREE_OPERAND (use->stmt, 1) = op;
5463     }
5464 }
5465
5466 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5467    for_each_index.  */
5468
5469 static bool
5470 idx_remove_ssa_names (tree base, tree *idx,
5471                       void *data ATTRIBUTE_UNUSED)
5472 {
5473   tree *op;
5474
5475   if (TREE_CODE (*idx) == SSA_NAME)
5476     *idx = SSA_NAME_VAR (*idx);
5477
5478   if (TREE_CODE (base) == ARRAY_REF)
5479     {
5480       op = &TREE_OPERAND (base, 2);
5481       if (*op
5482           && TREE_CODE (*op) == SSA_NAME)
5483         *op = SSA_NAME_VAR (*op);
5484       op = &TREE_OPERAND (base, 3);
5485       if (*op
5486           && TREE_CODE (*op) == SSA_NAME)
5487         *op = SSA_NAME_VAR (*op);
5488     }
5489
5490   return true;
5491 }
5492
5493 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5494
5495 static tree
5496 unshare_and_remove_ssa_names (tree ref)
5497 {
5498   ref = unshare_expr (ref);
5499   for_each_index (&ref, idx_remove_ssa_names, NULL);
5500
5501   return ref;
5502 }
5503
5504 /* Extract the alias analysis info for the memory reference REF.  There are
5505    several ways how this information may be stored and what precisely is
5506    its semantics depending on the type of the reference, but there always is
5507    somewhere hidden one _DECL node that is used to determine the set of
5508    virtual operands for the reference.  The code below deciphers this jungle
5509    and extracts this single useful piece of information.  */
5510
5511 static tree
5512 get_ref_tag (tree ref)
5513 {
5514   tree var = get_base_address (ref);
5515   tree tag;
5516
5517   if (!var)
5518     return NULL_TREE;
5519
5520   if (TREE_CODE (var) == INDIRECT_REF)
5521     {
5522       /* In case the base is a dereference of a pointer, first check its name
5523          mem tag, and if it does not have one, use type mem tag.  */
5524       var = TREE_OPERAND (var, 0);
5525       if (TREE_CODE (var) != SSA_NAME)
5526         return NULL_TREE;
5527
5528       if (SSA_NAME_PTR_INFO (var))
5529         {
5530           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5531           if (tag)
5532             return tag;
5533         }
5534
5535       var = SSA_NAME_VAR (var);
5536       tag = var_ann (var)->type_mem_tag;
5537       gcc_assert (tag != NULL_TREE);
5538       return tag;
5539     }
5540   else
5541     {
5542       if (!DECL_P (var))
5543         return NULL_TREE;
5544
5545       tag = var_ann (var)->type_mem_tag;
5546       if (tag)
5547         return tag;
5548
5549       return var;
5550     }
5551 }
5552
5553 /* Copies the reference information from OLD_REF to NEW_REF.  */
5554
5555 static void
5556 copy_ref_info (tree new_ref, tree old_ref)
5557 {
5558   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5559     copy_mem_ref_info (new_ref, old_ref);
5560   else
5561     {
5562       TMR_TAG (new_ref) = get_ref_tag (old_ref);
5563       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5564     }
5565 }
5566
5567 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5568
5569 static void
5570 rewrite_use_address (struct ivopts_data *data,
5571                      struct iv_use *use, struct iv_cand *cand)
5572 {
5573   struct affine_tree_combination aff;
5574   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5575   tree ref;
5576
5577   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5578   unshare_aff_combination (&aff);
5579
5580   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5581   copy_ref_info (ref, *use->op_p);
5582   *use->op_p = ref;
5583 }
5584
5585 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5586    candidate CAND.  */
5587
5588 static void
5589 rewrite_use_compare (struct ivopts_data *data,
5590                      struct iv_use *use, struct iv_cand *cand)
5591 {
5592   tree comp;
5593   tree *op_p, cond, op, stmts, bound;
5594   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5595   enum tree_code compare;
5596   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5597
5598   bound = cp->value;
5599   if (bound)
5600     {
5601       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5602       tree var_type = TREE_TYPE (var);
5603
5604       compare = iv_elimination_compare (data, use);
5605       bound = fold_convert (var_type, bound);
5606       op = force_gimple_operand (unshare_expr (bound), &stmts,
5607                                  true, NULL_TREE);
5608
5609       if (stmts)
5610         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5611
5612       *use->op_p = build2 (compare, boolean_type_node, var, op);
5613       update_stmt (use->stmt);
5614       return;
5615     }
5616
5617   /* The induction variable elimination failed; just express the original
5618      giv.  */
5619   comp = get_computation (data->current_loop, use, cand);
5620
5621   cond = *use->op_p;
5622   op_p = &TREE_OPERAND (cond, 0);
5623   if (TREE_CODE (*op_p) != SSA_NAME
5624       || zero_p (get_iv (data, *op_p)->step))
5625     op_p = &TREE_OPERAND (cond, 1);
5626
5627   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5628   if (stmts)
5629     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5630
5631   *op_p = op;
5632 }
5633
5634 /* Ensure that operand *OP_P may be used at the end of EXIT without
5635    violating loop closed ssa form.  */
5636
5637 static void
5638 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5639 {
5640   basic_block def_bb;
5641   struct loop *def_loop;
5642   tree phi, use;
5643
5644   use = USE_FROM_PTR (op_p);
5645   if (TREE_CODE (use) != SSA_NAME)
5646     return;
5647
5648   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5649   if (!def_bb)
5650     return;
5651
5652   def_loop = def_bb->loop_father;
5653   if (flow_bb_inside_loop_p (def_loop, exit->dest))
5654     return;
5655
5656   /* Try finding a phi node that copies the value out of the loop.  */
5657   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5658     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5659       break;
5660
5661   if (!phi)
5662     {
5663       /* Create such a phi node.  */
5664       tree new_name = duplicate_ssa_name (use, NULL);
5665
5666       phi = create_phi_node (new_name, exit->dest);
5667       SSA_NAME_DEF_STMT (new_name) = phi;
5668       add_phi_arg (phi, use, exit);
5669     }
5670
5671   SET_USE (op_p, PHI_RESULT (phi));
5672 }
5673
5674 /* Ensure that operands of STMT may be used at the end of EXIT without
5675    violating loop closed ssa form.  */
5676
5677 static void
5678 protect_loop_closed_ssa_form (edge exit, tree stmt)
5679 {
5680   ssa_op_iter iter;
5681   use_operand_p use_p;
5682
5683   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5684     protect_loop_closed_ssa_form_use (exit, use_p);
5685 }
5686
5687 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5688    so that they are emitted on the correct place, and so that the loop closed
5689    ssa form is preserved.  */
5690
5691 void
5692 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5693 {
5694   tree_stmt_iterator tsi;
5695   block_stmt_iterator bsi;
5696   tree phi, stmt, def, next;
5697
5698   if (!single_pred_p (exit->dest))
5699     split_loop_exit_edge (exit);
5700
5701   /* Ensure there is label in exit->dest, so that we can
5702      insert after it.  */
5703   tree_block_label (exit->dest);
5704   bsi = bsi_after_labels (exit->dest);
5705
5706   if (TREE_CODE (stmts) == STATEMENT_LIST)
5707     {
5708       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5709         {
5710           bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5711           protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5712         }
5713     }
5714   else
5715     {
5716       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5717       protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5718     }
5719
5720   if (!op)
5721     return;
5722
5723   for (phi = phi_nodes (exit->dest); phi; phi = next)
5724     {
5725       next = PHI_CHAIN (phi);
5726
5727       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5728         {
5729           def = PHI_RESULT (phi);
5730           remove_statement (phi, false);
5731           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5732                         def, op);
5733           SSA_NAME_DEF_STMT (def) = stmt;
5734           bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5735         }
5736     }
5737 }
5738
5739 /* Rewrites the final value of USE (that is only needed outside of the loop)
5740    using candidate CAND.  */
5741
5742 static void
5743 rewrite_use_outer (struct ivopts_data *data,
5744                    struct iv_use *use, struct iv_cand *cand)
5745 {
5746   edge exit;
5747   tree value, op, stmts, tgt;
5748   tree phi;
5749
5750   switch (TREE_CODE (use->stmt))
5751     {
5752     case PHI_NODE:
5753       tgt = PHI_RESULT (use->stmt);
5754       break;
5755     case MODIFY_EXPR:
5756       tgt = TREE_OPERAND (use->stmt, 0);
5757       break;
5758     default:
5759       gcc_unreachable ();
5760     }
5761
5762   exit = single_dom_exit (data->current_loop);
5763
5764   if (exit)
5765     {
5766       if (!cand->iv)
5767         {
5768           struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5769           value = unshare_expr (cp->value);
5770         }
5771       else
5772         value = get_computation_at (data->current_loop,
5773                                     use, cand, last_stmt (exit->src));
5774
5775       op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5776
5777       /* If we will preserve the iv anyway and we would need to perform
5778          some computation to replace the final value, do nothing.  */
5779       if (stmts && name_info (data, tgt)->preserve_biv)
5780         return;
5781
5782       for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5783         {
5784           use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5785
5786           if (USE_FROM_PTR (use_p) == tgt)
5787             SET_USE (use_p, op);
5788         }
5789
5790       if (stmts)
5791         compute_phi_arg_on_exit (exit, stmts, op);
5792
5793       /* Enable removal of the statement.  We cannot remove it directly,
5794          since we may still need the aliasing information attached to the
5795          ssa name defined by it.  */
5796       name_info (data, tgt)->iv->have_use_for = false;
5797       return;
5798     }
5799
5800   /* If the variable is going to be preserved anyway, there is nothing to
5801      do.  */
5802   if (name_info (data, tgt)->preserve_biv)
5803     return;
5804
5805   /* Otherwise we just need to compute the iv.  */
5806   rewrite_use_nonlinear_expr (data, use, cand);
5807 }
5808
5809 /* Rewrites USE using candidate CAND.  */
5810
5811 static void
5812 rewrite_use (struct ivopts_data *data,
5813              struct iv_use *use, struct iv_cand *cand)
5814 {
5815   switch (use->type)
5816     {
5817       case USE_NONLINEAR_EXPR:
5818         rewrite_use_nonlinear_expr (data, use, cand);
5819         break;
5820
5821       case USE_OUTER:
5822         rewrite_use_outer (data, use, cand);
5823         break;
5824
5825       case USE_ADDRESS:
5826         rewrite_use_address (data, use, cand);
5827         break;
5828
5829       case USE_COMPARE:
5830         rewrite_use_compare (data, use, cand);
5831         break;
5832
5833       default:
5834         gcc_unreachable ();
5835     }
5836   update_stmt (use->stmt);
5837 }
5838
5839 /* Rewrite the uses using the selected induction variables.  */
5840
5841 static void
5842 rewrite_uses (struct ivopts_data *data)
5843 {
5844   unsigned i;
5845   struct iv_cand *cand;
5846   struct iv_use *use;
5847
5848   for (i = 0; i < n_iv_uses (data); i++)
5849     {
5850       use = iv_use (data, i);
5851       cand = use->selected;
5852       gcc_assert (cand);
5853
5854       rewrite_use (data, use, cand);
5855     }
5856 }
5857
5858 /* Removes the ivs that are not used after rewriting.  */
5859
5860 static void
5861 remove_unused_ivs (struct ivopts_data *data)
5862 {
5863   unsigned j;
5864   bitmap_iterator bi;
5865
5866   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5867     {
5868       struct version_info *info;
5869
5870       info = ver_info (data, j);
5871       if (info->iv
5872           && !zero_p (info->iv->step)
5873           && !info->inv_id
5874           && !info->iv->have_use_for
5875           && !info->preserve_biv)
5876         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5877     }
5878 }
5879
5880 /* Frees data allocated by the optimization of a single loop.  */
5881
5882 static void
5883 free_loop_data (struct ivopts_data *data)
5884 {
5885   unsigned i, j;
5886   bitmap_iterator bi;
5887   tree obj;
5888
5889   htab_empty (data->niters);
5890
5891   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5892     {
5893       struct version_info *info;
5894
5895       info = ver_info (data, i);
5896       if (info->iv)
5897         free (info->iv);
5898       info->iv = NULL;
5899       info->has_nonlin_use = false;
5900       info->preserve_biv = false;
5901       info->inv_id = 0;
5902     }
5903   bitmap_clear (data->relevant);
5904   bitmap_clear (data->important_candidates);
5905
5906   for (i = 0; i < n_iv_uses (data); i++)
5907     {
5908       struct iv_use *use = iv_use (data, i);
5909
5910       free (use->iv);
5911       BITMAP_FREE (use->related_cands);
5912       for (j = 0; j < use->n_map_members; j++)
5913         if (use->cost_map[j].depends_on)
5914           BITMAP_FREE (use->cost_map[j].depends_on);
5915       free (use->cost_map);
5916       free (use);
5917     }
5918   VEC_truncate (iv_use_p, data->iv_uses, 0);
5919
5920   for (i = 0; i < n_iv_cands (data); i++)
5921     {
5922       struct iv_cand *cand = iv_cand (data, i);
5923
5924       if (cand->iv)
5925         free (cand->iv);
5926       if (cand->depends_on)
5927         BITMAP_FREE (cand->depends_on);
5928       free (cand);
5929     }
5930   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5931
5932   if (data->version_info_size < num_ssa_names)
5933     {
5934       data->version_info_size = 2 * num_ssa_names;
5935       free (data->version_info);
5936       data->version_info = xcalloc (data->version_info_size,
5937                                     sizeof (struct version_info));
5938     }
5939
5940   data->max_inv_id = 0;
5941
5942   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5943     SET_DECL_RTL (obj, NULL_RTX);
5944
5945   VEC_truncate (tree, decl_rtl_to_reset, 0);
5946 }
5947
5948 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5949    loop tree.  */
5950
5951 static void
5952 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5953 {
5954   unsigned i;
5955
5956   for (i = 1; i < loops->num; i++)
5957     if (loops->parray[i])
5958       {
5959         free (loops->parray[i]->aux);
5960         loops->parray[i]->aux = NULL;
5961       }
5962
5963   free_loop_data (data);
5964   free (data->version_info);
5965   BITMAP_FREE (data->relevant);
5966   BITMAP_FREE (data->important_candidates);
5967   htab_delete (data->niters);
5968
5969   VEC_free (tree, heap, decl_rtl_to_reset);
5970   VEC_free (iv_use_p, heap, data->iv_uses);
5971   VEC_free (iv_cand_p, heap, data->iv_candidates);
5972 }
5973
5974 /* Optimizes the LOOP.  Returns true if anything changed.  */
5975
5976 static bool
5977 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5978 {
5979   bool changed = false;
5980   struct iv_ca *iv_ca;
5981   edge exit;
5982
5983   data->current_loop = loop;
5984
5985   if (dump_file && (dump_flags & TDF_DETAILS))
5986     {
5987       fprintf (dump_file, "Processing loop %d\n", loop->num);
5988
5989       exit = single_dom_exit (loop);
5990       if (exit)
5991         {
5992           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5993                    exit->src->index, exit->dest->index);
5994           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5995           fprintf (dump_file, "\n");
5996         }
5997
5998       fprintf (dump_file, "\n");
5999     }
6000
6001   /* For each ssa name determines whether it behaves as an induction variable
6002      in some loop.  */
6003   if (!find_induction_variables (data))
6004     goto finish;
6005
6006   /* Finds interesting uses (item 1).  */
6007   find_interesting_uses (data);
6008   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6009     goto finish;
6010
6011   /* Finds candidates for the induction variables (item 2).  */
6012   find_iv_candidates (data);
6013
6014   /* Calculates the costs (item 3, part 1).  */
6015   determine_use_iv_costs (data);
6016   determine_iv_costs (data);
6017   determine_set_costs (data);
6018
6019   /* Find the optimal set of induction variables (item 3, part 2).  */
6020   iv_ca = find_optimal_iv_set (data);
6021   if (!iv_ca)
6022     goto finish;
6023   changed = true;
6024
6025   /* Create the new induction variables (item 4, part 1).  */
6026   create_new_ivs (data, iv_ca);
6027   iv_ca_free (&iv_ca);
6028
6029   /* Rewrite the uses (item 4, part 2).  */
6030   rewrite_uses (data);
6031
6032   /* Remove the ivs that are unused after rewriting.  */
6033   remove_unused_ivs (data);
6034
6035   /* We have changed the structure of induction variables; it might happen
6036      that definitions in the scev database refer to some of them that were
6037      eliminated.  */
6038   scev_reset ();
6039
6040 finish:
6041   free_loop_data (data);
6042
6043   return changed;
6044 }
6045
6046 /* Main entry point.  Optimizes induction variables in LOOPS.  */
6047
6048 void
6049 tree_ssa_iv_optimize (struct loops *loops)
6050 {
6051   struct loop *loop;
6052   struct ivopts_data data;
6053
6054   tree_ssa_iv_optimize_init (loops, &data);
6055
6056   /* Optimize the loops starting with the innermost ones.  */
6057   loop = loops->tree_root;
6058   while (loop->inner)
6059     loop = loop->inner;
6060
6061   /* Scan the loops, inner ones first.  */
6062   while (loop != loops->tree_root)
6063     {
6064       if (dump_file && (dump_flags & TDF_DETAILS))
6065         flow_loop_dump (loop, dump_file, NULL, 1);
6066
6067       tree_ssa_iv_optimize_loop (&data, loop);
6068
6069       if (loop->next)
6070         {
6071           loop = loop->next;
6072           while (loop->inner)
6073             loop = loop->inner;
6074         }
6075       else
6076         loop = loop->outer;
6077     }
6078
6079   tree_ssa_iv_optimize_finalize (loops, &data);
6080 }