gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "rtl.h"
  70 #include "tm_p.h"
  71 #include "hard-reg-set.h"
  72 #include "basic-block.h"
  73 #include "output.h"
  74 #include "diagnostic.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "varray.h"
  80 #include "expr.h"
  81 #include "tree-pass.h"
  82 #include "ggc.h"
  83 #include "insn-config.h"
  84 #include "recog.h"
  85 #include "hashtab.h"
  86 #include "tree-chrec.h"
  87 #include "tree-scalar-evolution.h"
  88 #include "cfgloop.h"
  89 #include "params.h"
  90 #include "langhooks.h"
  91
  92 /* The infinite cost.  */
  93 #define INFTY 10000000
  94
  95 /* The expected number of loop iterations.  TODO -- use profiling instead of
  96    this.  */
  97 #define AVG_LOOP_NITER(LOOP) 5
  98
  99
 100 /* Representation of the induction variable.  */
 101 struct iv
 102 {
 103   tree base;            /* Initial value of the iv.  */
 104   tree base_object;     /* A memory object to that the induction variable points.  */
 105   tree step;            /* Step of the iv (constant only).  */
 106   tree ssa_name;        /* The ssa name with the value.  */
 107   bool biv_p;           /* Is it a biv?  */
 108   bool have_use_for;    /* Do we already have a use for it?  */
 109   unsigned use_id;      /* The identifier in the use if it is the case.  */
 110 };
 111
 112 /* Per-ssa version information (induction variable descriptions, etc.).  */
 113 struct version_info
 114 {
 115   tree name;            /* The ssa name.  */
 116   struct iv *iv;        /* Induction variable description.  */
 117   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 118                            an expression that is not an induction variable.  */
 119   unsigned inv_id;      /* Id of an invariant.  */
 120   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 121 };
 122
 123 /* Types of uses.  */
 124 enum use_type
 125 {
 126   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 127   USE_ADDRESS,          /* Use in an address.  */
 128   USE_COMPARE           /* Use is a compare.  */
 129 };
 130
 131 /* The candidate - cost pair.  */
 132 struct cost_pair
 133 {
 134   struct iv_cand *cand; /* The candidate.  */
 135   unsigned cost;        /* The cost.  */
 136   bitmap depends_on;    /* The list of invariants that have to be
 137                            preserved.  */
 138   tree value;           /* For final value elimination, the expression for
 139                            the final value of the iv.  For iv elimination,
 140                            the new bound to compare with.  */
 141 };
 142
 143 /* Use.  */
 144 struct iv_use
 145 {
 146   unsigned id;          /* The id of the use.  */
 147   enum use_type type;   /* Type of the use.  */
 148   struct iv *iv;        /* The induction variable it is based on.  */
 149   tree stmt;            /* Statement in that it occurs.  */
 150   tree *op_p;           /* The place where it occurs.  */
 151   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 152                            important ones.  */
 153
 154   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 155   struct cost_pair *cost_map;
 156                         /* The costs wrto the iv candidates.  */
 157
 158   struct iv_cand *selected;
 159                         /* The selected candidate.  */
 160 };
 161
 162 /* The position where the iv is computed.  */
 163 enum iv_position
 164 {
 165   IP_NORMAL,            /* At the end, just before the exit condition.  */
 166   IP_END,               /* At the end of the latch block.  */
 167   IP_ORIGINAL           /* The original biv.  */
 168 };
 169
 170 /* The induction variable candidate.  */
 171 struct iv_cand
 172 {
 173   unsigned id;          /* The number of the candidate.  */
 174   bool important;       /* Whether this is an "important" candidate, i.e. such
 175                            that it should be considered by all uses.  */
 176   enum iv_position pos; /* Where it is computed.  */
 177   tree incremented_at;  /* For original biv, the statement where it is
 178                            incremented.  */
 179   tree var_before;      /* The variable used for it before increment.  */
 180   tree var_after;       /* The variable used for it after increment.  */
 181   struct iv *iv;        /* The value of the candidate.  NULL for
 182                            "pseudocandidate" used to indicate the possibility
 183                            to replace the final value of an iv by direct
 184                            computation of the value.  */
 185   unsigned cost;        /* Cost of the candidate.  */
 186   bitmap depends_on;    /* The list of invariants that are used in step of the
 187                            biv.  */
 188 };
 189
 190 /* The data used by the induction variable optimizations.  */
 191
 192 typedef struct iv_use *iv_use_p;
 193 DEF_VEC_P(iv_use_p);
 194 DEF_VEC_ALLOC_P(iv_use_p,heap);
 195
 196 typedef struct iv_cand *iv_cand_p;
 197 DEF_VEC_P(iv_cand_p);
 198 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 199
 200 struct ivopts_data
 201 {
 202   /* The currently optimized loop.  */
 203   struct loop *current_loop;
 204
 205   /* Number of registers used in it.  */
 206   unsigned regs_used;
 207
 208   /* Numbers of iterations for all exits of the current loop.  */
 209   htab_t niters;
 210
 211   /* The size of version_info array allocated.  */
 212   unsigned version_info_size;
 213
 214   /* The array of information for the ssa names.  */
 215   struct version_info *version_info;
 216
 217   /* The bitmap of indices in version_info whose value was changed.  */
 218   bitmap relevant;
 219
 220   /* The maximum invariant id.  */
 221   unsigned max_inv_id;
 222
 223   /* The uses of induction variables.  */
 224   VEC(iv_use_p,heap) *iv_uses;
 225
 226   /* The candidates.  */
 227   VEC(iv_cand_p,heap) *iv_candidates;
 228
 229   /* A bitmap of important candidates.  */
 230   bitmap important_candidates;
 231
 232   /* Whether to consider just related and important candidates when replacing a
 233      use.  */
 234   bool consider_all_candidates;
 235 };
 236
 237 /* An assignment of iv candidates to uses.  */
 238
 239 struct iv_ca
 240 {
 241   /* The number of uses covered by the assignment.  */
 242   unsigned upto;
 243
 244   /* Number of uses that cannot be expressed by the candidates in the set.  */
 245   unsigned bad_uses;
 246
 247   /* Candidate assigned to a use, together with the related costs.  */
 248   struct cost_pair **cand_for_use;
 249
 250   /* Number of times each candidate is used.  */
 251   unsigned *n_cand_uses;
 252
 253   /* The candidates used.  */
 254   bitmap cands;
 255
 256   /* The number of candidates in the set.  */
 257   unsigned n_cands;
 258
 259   /* Total number of registers needed.  */
 260   unsigned n_regs;
 261
 262   /* Total cost of expressing uses.  */
 263   unsigned cand_use_cost;
 264
 265   /* Total cost of candidates.  */
 266   unsigned cand_cost;
 267
 268   /* Number of times each invariant is used.  */
 269   unsigned *n_invariant_uses;
 270
 271   /* Total cost of the assignment.  */
 272   unsigned cost;
 273 };
 274
 275 /* Difference of two iv candidate assignments.  */
 276
 277 struct iv_ca_delta
 278 {
 279   /* Changed use.  */
 280   struct iv_use *use;
 281
 282   /* An old assignment (for rollback purposes).  */
 283   struct cost_pair *old_cp;
 284
 285   /* A new assignment.  */
 286   struct cost_pair *new_cp;
 287
 288   /* Next change in the list.  */
 289   struct iv_ca_delta *next_change;
 290 };
 291
 292 /* Bound on number of candidates below that all candidates are considered.  */
 293
 294 #define CONSIDER_ALL_CANDIDATES_BOUND \
 295   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 296
 297 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 298    optimizing such a loop would help, and it would take ages).  */
 299
 300 #define MAX_CONSIDERED_USES \
 301   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 302
 303 /* If there are at most this number of ivs in the set, try removing unnecessary
 304    ivs from the set always.  */
 305
 306 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 307   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 308
 309 /* The list of trees for that the decl_rtl field must be reset is stored
 310    here.  */
 311
 312 static VEC(tree,heap) *decl_rtl_to_reset;
 313
 314 /* Number of uses recorded in DATA.  */
 315
 316 static inline unsigned
 317 n_iv_uses (struct ivopts_data *data)
 318 {
 319   return VEC_length (iv_use_p, data->iv_uses);
 320 }
 321
 322 /* Ith use recorded in DATA.  */
 323
 324 static inline struct iv_use *
 325 iv_use (struct ivopts_data *data, unsigned i)
 326 {
 327   return VEC_index (iv_use_p, data->iv_uses, i);
 328 }
 329
 330 /* Number of candidates recorded in DATA.  */
 331
 332 static inline unsigned
 333 n_iv_cands (struct ivopts_data *data)
 334 {
 335   return VEC_length (iv_cand_p, data->iv_candidates);
 336 }
 337
 338 /* Ith candidate recorded in DATA.  */
 339
 340 static inline struct iv_cand *
 341 iv_cand (struct ivopts_data *data, unsigned i)
 342 {
 343   return VEC_index (iv_cand_p, data->iv_candidates, i);
 344 }
 345
 346 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 347
 348 edge
 349 single_dom_exit (struct loop *loop)
 350 {
 351   edge exit = loop->single_exit;
 352
 353   if (!exit)
 354     return NULL;
 355
 356   if (!just_once_each_iteration_p (loop, exit->src))
 357     return NULL;
 358
 359   return exit;
 360 }
 361
 362 /* Dumps information about the induction variable IV to FILE.  */
 363
 364 extern void dump_iv (FILE *, struct iv *);
 365 void
 366 dump_iv (FILE *file, struct iv *iv)
 367 {
 368   if (iv->ssa_name)
 369     {
 370       fprintf (file, "ssa name ");
 371       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 372       fprintf (file, "\n");
 373     }
 374
 375   fprintf (file, "  type ");
 376   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 377   fprintf (file, "\n");
 378
 379   if (iv->step)
 380     {
 381       fprintf (file, "  base ");
 382       print_generic_expr (file, iv->base, TDF_SLIM);
 383       fprintf (file, "\n");
 384
 385       fprintf (file, "  step ");
 386       print_generic_expr (file, iv->step, TDF_SLIM);
 387       fprintf (file, "\n");
 388     }
 389   else
 390     {
 391       fprintf (file, "  invariant ");
 392       print_generic_expr (file, iv->base, TDF_SLIM);
 393       fprintf (file, "\n");
 394     }
 395
 396   if (iv->base_object)
 397     {
 398       fprintf (file, "  base object ");
 399       print_generic_expr (file, iv->base_object, TDF_SLIM);
 400       fprintf (file, "\n");
 401     }
 402
 403   if (iv->biv_p)
 404     fprintf (file, "  is a biv\n");
 405 }
 406
 407 /* Dumps information about the USE to FILE.  */
 408
 409 extern void dump_use (FILE *, struct iv_use *);
 410 void
 411 dump_use (FILE *file, struct iv_use *use)
 412 {
 413   fprintf (file, "use %d\n", use->id);
 414
 415   switch (use->type)
 416     {
 417     case USE_NONLINEAR_EXPR:
 418       fprintf (file, "  generic\n");
 419       break;
 420
 421     case USE_ADDRESS:
 422       fprintf (file, "  address\n");
 423       break;
 424
 425     case USE_COMPARE:
 426       fprintf (file, "  compare\n");
 427       break;
 428
 429     default:
 430       gcc_unreachable ();
 431     }
 432
 433   fprintf (file, "  in statement ");
 434   print_generic_expr (file, use->stmt, TDF_SLIM);
 435   fprintf (file, "\n");
 436
 437   fprintf (file, "  at position ");
 438   if (use->op_p)
 439     print_generic_expr (file, *use->op_p, TDF_SLIM);
 440   fprintf (file, "\n");
 441
 442   dump_iv (file, use->iv);
 443
 444   if (use->related_cands)
 445     {
 446       fprintf (file, "  related candidates ");
 447       dump_bitmap (file, use->related_cands);
 448     }
 449 }
 450
 451 /* Dumps information about the uses to FILE.  */
 452
 453 extern void dump_uses (FILE *, struct ivopts_data *);
 454 void
 455 dump_uses (FILE *file, struct ivopts_data *data)
 456 {
 457   unsigned i;
 458   struct iv_use *use;
 459
 460   for (i = 0; i < n_iv_uses (data); i++)
 461     {
 462       use = iv_use (data, i);
 463
 464       dump_use (file, use);
 465       fprintf (file, "\n");
 466     }
 467 }
 468
 469 /* Dumps information about induction variable candidate CAND to FILE.  */
 470
 471 extern void dump_cand (FILE *, struct iv_cand *);
 472 void
 473 dump_cand (FILE *file, struct iv_cand *cand)
 474 {
 475   struct iv *iv = cand->iv;
 476
 477   fprintf (file, "candidate %d%s\n",
 478            cand->id, cand->important ? " (important)" : "");
 479
 480   if (cand->depends_on)
 481     {
 482       fprintf (file, "  depends on ");
 483       dump_bitmap (file, cand->depends_on);
 484     }
 485
 486   if (!iv)
 487     {
 488       fprintf (file, "  final value replacement\n");
 489       return;
 490     }
 491
 492   switch (cand->pos)
 493     {
 494     case IP_NORMAL:
 495       fprintf (file, "  incremented before exit test\n");
 496       break;
 497
 498     case IP_END:
 499       fprintf (file, "  incremented at end\n");
 500       break;
 501
 502     case IP_ORIGINAL:
 503       fprintf (file, "  original biv\n");
 504       break;
 505     }
 506
 507   dump_iv (file, iv);
 508 }
 509
 510 /* Returns the info for ssa version VER.  */
 511
 512 static inline struct version_info *
 513 ver_info (struct ivopts_data *data, unsigned ver)
 514 {
 515   return data->version_info + ver;
 516 }
 517
 518 /* Returns the info for ssa name NAME.  */
 519
 520 static inline struct version_info *
 521 name_info (struct ivopts_data *data, tree name)
 522 {
 523   return ver_info (data, SSA_NAME_VERSION (name));
 524 }
 525
 526 /* Checks whether there exists number X such that X * B = A, counting modulo
 527    2^BITS.  */
 528
 529 static bool
 530 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 531         HOST_WIDE_INT *x)
 532 {
 533   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 534   unsigned HOST_WIDE_INT inv, ex, val;
 535   unsigned i;
 536
 537   a &= mask;
 538   b &= mask;
 539
 540   /* First divide the whole equation by 2 as long as possible.  */
 541   while (!(a & 1) && !(b & 1))
 542     {
 543       a >>= 1;
 544       b >>= 1;
 545       bits--;
 546       mask >>= 1;
 547     }
 548
 549   if (!(b & 1))
 550     {
 551       /* If b is still even, a is odd and there is no such x.  */
 552       return false;
 553     }
 554
 555   /* Find the inverse of b.  We compute it as
 556      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 557   inv = 1;
 558   ex = b;
 559   for (i = 0; i < bits - 1; i++)
 560     {
 561       inv = (inv * ex) & mask;
 562       ex = (ex * ex) & mask;
 563     }
 564
 565   val = (a * inv) & mask;
 566
 567   gcc_assert (((val * b) & mask) == a);
 568
 569   if ((val >> (bits - 1)) & 1)
 570     val |= ~mask;
 571
 572   *x = val;
 573
 574   return true;
 575 }
 576
 577 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 578    emitted in LOOP.  */
 579
 580 static bool
 581 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 582 {
 583   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 584
 585   gcc_assert (bb);
 586
 587   if (sbb == loop->latch)
 588     return true;
 589
 590   if (sbb != bb)
 591     return false;
 592
 593   return stmt == last_stmt (bb);
 594 }
 595
 596 /* Returns true if STMT if after the place where the original induction
 597    variable CAND is incremented.  */
 598
 599 static bool
 600 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 601 {
 602   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 603   basic_block stmt_bb = bb_for_stmt (stmt);
 604   block_stmt_iterator bsi;
 605
 606   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 607     return false;
 608
 609   if (stmt_bb != cand_bb)
 610     return true;
 611
 612   /* Scan the block from the end, since the original ivs are usually
 613      incremented at the end of the loop body.  */
 614   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 615     {
 616       if (bsi_stmt (bsi) == cand->incremented_at)
 617         return false;
 618       if (bsi_stmt (bsi) == stmt)
 619         return true;
 620     }
 621 }
 622
 623 /* Returns true if STMT if after the place where the induction variable
 624    CAND is incremented in LOOP.  */
 625
 626 static bool
 627 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 628 {
 629   switch (cand->pos)
 630     {
 631     case IP_END:
 632       return false;
 633
 634     case IP_NORMAL:
 635       return stmt_after_ip_normal_pos (loop, stmt);
 636
 637     case IP_ORIGINAL:
 638       return stmt_after_ip_original_pos (cand, stmt);
 639
 640     default:
 641       gcc_unreachable ();
 642     }
 643 }
 644
 645 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 646
 647 static bool
 648 abnormal_ssa_name_p (tree exp)
 649 {
 650   if (!exp)
 651     return false;
 652
 653   if (TREE_CODE (exp) != SSA_NAME)
 654     return false;
 655
 656   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 657 }
 658
 659 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 660    abnormal phi node.  Callback for for_each_index.  */
 661
 662 static bool
 663 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 664                                   void *data ATTRIBUTE_UNUSED)
 665 {
 666   if (TREE_CODE (base) == ARRAY_REF)
 667     {
 668       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 669         return false;
 670       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 671         return false;
 672     }
 673
 674   return !abnormal_ssa_name_p (*index);
 675 }
 676
 677 /* Returns true if EXPR contains a ssa name that occurs in an
 678    abnormal phi node.  */
 679
 680 bool
 681 contains_abnormal_ssa_name_p (tree expr)
 682 {
 683   enum tree_code code;
 684   enum tree_code_class class;
 685
 686   if (!expr)
 687     return false;
 688
 689   code = TREE_CODE (expr);
 690   class = TREE_CODE_CLASS (code);
 691
 692   if (code == SSA_NAME)
 693     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 694
 695   if (code == INTEGER_CST
 696       || is_gimple_min_invariant (expr))
 697     return false;
 698
 699   if (code == ADDR_EXPR)
 700     return !for_each_index (&TREE_OPERAND (expr, 0),
 701                             idx_contains_abnormal_ssa_name_p,
 702                             NULL);
 703
 704   switch (class)
 705     {
 706     case tcc_binary:
 707     case tcc_comparison:
 708       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 709         return true;
 710
 711       /* Fallthru.  */
 712     case tcc_unary:
 713       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 714         return true;
 715
 716       break;
 717
 718     default:
 719       gcc_unreachable ();
 720     }
 721
 722   return false;
 723 }
 724
 725 /* Element of the table in that we cache the numbers of iterations obtained
 726    from exits of the loop.  */
 727
 728 struct nfe_cache_elt
 729 {
 730   /* The edge for that the number of iterations is cached.  */
 731   edge exit;
 732
 733   /* Number of iterations corresponding to this exit, or NULL if it cannot be
 734      determined.  */
 735   tree niter;
 736 };
 737
 738 /* Hash function for nfe_cache_elt E.  */
 739
 740 static hashval_t
 741 nfe_hash (const void *e)
 742 {
 743   const struct nfe_cache_elt *elt = e;
 744
 745   return htab_hash_pointer (elt->exit);
 746 }
 747
 748 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 749
 750 static int
 751 nfe_eq (const void *e1, const void *e2)
 752 {
 753   const struct nfe_cache_elt *elt1 = e1;
 754
 755   return elt1->exit == e2;
 756 }
 757
 758 /*  Returns tree describing number of iterations determined from
 759     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 760
 761 static tree
 762 niter_for_exit (struct ivopts_data *data, edge exit)
 763 {
 764   struct nfe_cache_elt *nfe_desc;
 765   struct tree_niter_desc desc;
 766   PTR *slot;
 767
 768   slot = htab_find_slot_with_hash (data->niters, exit,
 769                                    htab_hash_pointer (exit),
 770                                    INSERT);
 771
 772   if (!*slot)
 773     {
 774       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 775       nfe_desc->exit = exit;
 776
 777       /* Try to determine number of iterations.  We must know it
 778          unconditionally (i.e., without possibility of # of iterations
 779          being zero).  Also, we cannot safely work with ssa names that
 780          appear in phi nodes on abnormal edges, so that we do not create
 781          overlapping life ranges for them (PR 27283).  */
 782       if (number_of_iterations_exit (data->current_loop,
 783                                      exit, &desc, true)
 784           && zero_p (desc.may_be_zero)
 785           && !contains_abnormal_ssa_name_p (desc.niter))
 786         nfe_desc->niter = desc.niter;
 787       else
 788         nfe_desc->niter = NULL_TREE;
 789     }
 790   else
 791     nfe_desc = *slot;
 792
 793   return nfe_desc->niter;
 794 }
 795
 796 /* Returns tree describing number of iterations determined from
 797    single dominating exit of DATA->current_loop, or NULL if something
 798    goes wrong.  */
 799
 800 static tree
 801 niter_for_single_dom_exit (struct ivopts_data *data)
 802 {
 803   edge exit = single_dom_exit (data->current_loop);
 804
 805   if (!exit)
 806     return NULL;
 807
 808   return niter_for_exit (data, exit);
 809 }
 810
 811 /* Initializes data structures used by the iv optimization pass, stored
 812    in DATA.  */
 813
 814 static void
 815 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 816 {
 817   data->version_info_size = 2 * num_ssa_names;
 818   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 819   data->relevant = BITMAP_ALLOC (NULL);
 820   data->important_candidates = BITMAP_ALLOC (NULL);
 821   data->max_inv_id = 0;
 822   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 823   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 824   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 825   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 826 }
 827
 828 /* Returns a memory object to that EXPR points.  In case we are able to
 829    determine that it does not point to any such object, NULL is returned.  */
 830
 831 static tree
 832 determine_base_object (tree expr)
 833 {
 834   enum tree_code code = TREE_CODE (expr);
 835   tree base, obj, op0, op1;
 836
 837   /* If this is a pointer casted to any type, we need to determine
 838      the base object for the pointer; so handle conversions before
 839      throwing away non-pointer expressions.  */
 840   if (TREE_CODE (expr) == NOP_EXPR
 841       || TREE_CODE (expr) == CONVERT_EXPR)
 842     return determine_base_object (TREE_OPERAND (expr, 0));
 843
 844   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 845     return NULL_TREE;
 846
 847   switch (code)
 848     {
 849     case INTEGER_CST:
 850       return NULL_TREE;
 851
 852     case ADDR_EXPR:
 853       obj = TREE_OPERAND (expr, 0);
 854       base = get_base_address (obj);
 855
 856       if (!base)
 857         return expr;
 858
 859       if (TREE_CODE (base) == INDIRECT_REF)
 860         return determine_base_object (TREE_OPERAND (base, 0));
 861
 862       return fold_convert (ptr_type_node,
 863                            build_fold_addr_expr (base));
 864
 865     case PLUS_EXPR:
 866     case MINUS_EXPR:
 867       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 868       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 869
 870       if (!op1)
 871         return op0;
 872
 873       if (!op0)
 874         return (code == PLUS_EXPR
 875                 ? op1
 876                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 877
 878       return fold_build2 (code, ptr_type_node, op0, op1);
 879
 880     default:
 881       return fold_convert (ptr_type_node, expr);
 882     }
 883 }
 884
 885 /* Allocates an induction variable with given initial value BASE and step STEP
 886    for loop LOOP.  */
 887
 888 static struct iv *
 889 alloc_iv (tree base, tree step)
 890 {
 891   struct iv *iv = XCNEW (struct iv);
 892
 893   if (step && integer_zerop (step))
 894     step = NULL_TREE;
 895
 896   iv->base = base;
 897   iv->base_object = determine_base_object (base);
 898   iv->step = step;
 899   iv->biv_p = false;
 900   iv->have_use_for = false;
 901   iv->use_id = 0;
 902   iv->ssa_name = NULL_TREE;
 903
 904   return iv;
 905 }
 906
 907 /* Sets STEP and BASE for induction variable IV.  */
 908
 909 static void
 910 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 911 {
 912   struct version_info *info = name_info (data, iv);
 913
 914   gcc_assert (!info->iv);
 915
 916   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 917   info->iv = alloc_iv (base, step);
 918   info->iv->ssa_name = iv;
 919 }
 920
 921 /* Finds induction variable declaration for VAR.  */
 922
 923 static struct iv *
 924 get_iv (struct ivopts_data *data, tree var)
 925 {
 926   basic_block bb;
 927
 928   if (!name_info (data, var)->iv)
 929     {
 930       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 931
 932       if (!bb
 933           || !flow_bb_inside_loop_p (data->current_loop, bb))
 934         set_iv (data, var, var, NULL_TREE);
 935     }
 936
 937   return name_info (data, var)->iv;
 938 }
 939
 940 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 941    not define a simple affine biv with nonzero step.  */
 942
 943 static tree
 944 determine_biv_step (tree phi)
 945 {
 946   struct loop *loop = bb_for_stmt (phi)->loop_father;
 947   tree name = PHI_RESULT (phi);
 948   affine_iv iv;
 949
 950   if (!is_gimple_reg (name))
 951     return NULL_TREE;
 952
 953   if (!simple_iv (loop, phi, name, &iv, true))
 954     return NULL_TREE;
 955
 956   return (zero_p (iv.step) ? NULL_TREE : iv.step);
 957 }
 958
 959 /* Finds basic ivs.  */
 960
 961 static bool
 962 find_bivs (struct ivopts_data *data)
 963 {
 964   tree phi, step, type, base;
 965   bool found = false;
 966   struct loop *loop = data->current_loop;
 967
 968   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 969     {
 970       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 971         continue;
 972
 973       step = determine_biv_step (phi);
 974       if (!step)
 975         continue;
 976
 977       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 978       base = expand_simple_operations (base);
 979       if (contains_abnormal_ssa_name_p (base)
 980           || contains_abnormal_ssa_name_p (step))
 981         continue;
 982
 983       type = TREE_TYPE (PHI_RESULT (phi));
 984       base = fold_convert (type, base);
 985       if (step)
 986         step = fold_convert (type, step);
 987
 988       set_iv (data, PHI_RESULT (phi), base, step);
 989       found = true;
 990     }
 991
 992   return found;
 993 }
 994
 995 /* Marks basic ivs.  */
 996
 997 static void
 998 mark_bivs (struct ivopts_data *data)
 999 {
1000   tree phi, var;
1001   struct iv *iv, *incr_iv;
1002   struct loop *loop = data->current_loop;
1003   basic_block incr_bb;
1004
1005   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1006     {
1007       iv = get_iv (data, PHI_RESULT (phi));
1008       if (!iv)
1009         continue;
1010
1011       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1012       incr_iv = get_iv (data, var);
1013       if (!incr_iv)
1014         continue;
1015
1016       /* If the increment is in the subloop, ignore it.  */
1017       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1018       if (incr_bb->loop_father != data->current_loop
1019           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1020         continue;
1021
1022       iv->biv_p = true;
1023       incr_iv->biv_p = true;
1024     }
1025 }
1026
1027 /* Checks whether STMT defines a linear induction variable and stores its
1028    parameters to IV.  */
1029
1030 static bool
1031 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1032 {
1033   tree lhs;
1034   struct loop *loop = data->current_loop;
1035
1036   iv->base = NULL_TREE;
1037   iv->step = NULL_TREE;
1038
1039   if (TREE_CODE (stmt) != MODIFY_EXPR)
1040     return false;
1041
1042   lhs = TREE_OPERAND (stmt, 0);
1043   if (TREE_CODE (lhs) != SSA_NAME)
1044     return false;
1045
1046   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1047     return false;
1048   iv->base = expand_simple_operations (iv->base);
1049
1050   if (contains_abnormal_ssa_name_p (iv->base)
1051       || contains_abnormal_ssa_name_p (iv->step))
1052     return false;
1053
1054   return true;
1055 }
1056
1057 /* Finds general ivs in statement STMT.  */
1058
1059 static void
1060 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1061 {
1062   affine_iv iv;
1063
1064   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1065     return;
1066
1067   set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1068 }
1069
1070 /* Finds general ivs in basic block BB.  */
1071
1072 static void
1073 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1074 {
1075   block_stmt_iterator bsi;
1076
1077   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1078     find_givs_in_stmt (data, bsi_stmt (bsi));
1079 }
1080
1081 /* Finds general ivs.  */
1082
1083 static void
1084 find_givs (struct ivopts_data *data)
1085 {
1086   struct loop *loop = data->current_loop;
1087   basic_block *body = get_loop_body_in_dom_order (loop);
1088   unsigned i;
1089
1090   for (i = 0; i < loop->num_nodes; i++)
1091     find_givs_in_bb (data, body[i]);
1092   free (body);
1093 }
1094
1095 /* For each ssa name defined in LOOP determines whether it is an induction
1096    variable and if so, its initial value and step.  */
1097
1098 static bool
1099 find_induction_variables (struct ivopts_data *data)
1100 {
1101   unsigned i;
1102   bitmap_iterator bi;
1103
1104   if (!find_bivs (data))
1105     return false;
1106
1107   find_givs (data);
1108   mark_bivs (data);
1109
1110   if (dump_file && (dump_flags & TDF_DETAILS))
1111     {
1112       tree niter = niter_for_single_dom_exit (data);
1113
1114       if (niter)
1115         {
1116           fprintf (dump_file, "  number of iterations ");
1117           print_generic_expr (dump_file, niter, TDF_SLIM);
1118           fprintf (dump_file, "\n\n");
1119         };
1120
1121       fprintf (dump_file, "Induction variables:\n\n");
1122
1123       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1124         {
1125           if (ver_info (data, i)->iv)
1126             dump_iv (dump_file, ver_info (data, i)->iv);
1127         }
1128     }
1129
1130   return true;
1131 }
1132
1133 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1134
1135 static struct iv_use *
1136 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1137             tree stmt, enum use_type use_type)
1138 {
1139   struct iv_use *use = XCNEW (struct iv_use);
1140
1141   use->id = n_iv_uses (data);
1142   use->type = use_type;
1143   use->iv = iv;
1144   use->stmt = stmt;
1145   use->op_p = use_p;
1146   use->related_cands = BITMAP_ALLOC (NULL);
1147
1148   /* To avoid showing ssa name in the dumps, if it was not reset by the
1149      caller.  */
1150   iv->ssa_name = NULL_TREE;
1151
1152   if (dump_file && (dump_flags & TDF_DETAILS))
1153     dump_use (dump_file, use);
1154
1155   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1156
1157   return use;
1158 }
1159
1160 /* Checks whether OP is a loop-level invariant and if so, records it.
1161    NONLINEAR_USE is true if the invariant is used in a way we do not
1162    handle specially.  */
1163
1164 static void
1165 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1166 {
1167   basic_block bb;
1168   struct version_info *info;
1169
1170   if (TREE_CODE (op) != SSA_NAME
1171       || !is_gimple_reg (op))
1172     return;
1173
1174   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1175   if (bb
1176       && flow_bb_inside_loop_p (data->current_loop, bb))
1177     return;
1178
1179   info = name_info (data, op);
1180   info->name = op;
1181   info->has_nonlin_use |= nonlinear_use;
1182   if (!info->inv_id)
1183     info->inv_id = ++data->max_inv_id;
1184   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1185 }
1186
1187 /* Checks whether the use OP is interesting and if so, records it.  */
1188
1189 static struct iv_use *
1190 find_interesting_uses_op (struct ivopts_data *data, tree op)
1191 {
1192   struct iv *iv;
1193   struct iv *civ;
1194   tree stmt;
1195   struct iv_use *use;
1196
1197   if (TREE_CODE (op) != SSA_NAME)
1198     return NULL;
1199
1200   iv = get_iv (data, op);
1201   if (!iv)
1202     return NULL;
1203
1204   if (iv->have_use_for)
1205     {
1206       use = iv_use (data, iv->use_id);
1207
1208       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1209       return use;
1210     }
1211
1212   if (zero_p (iv->step))
1213     {
1214       record_invariant (data, op, true);
1215       return NULL;
1216     }
1217   iv->have_use_for = true;
1218
1219   civ = XNEW (struct iv);
1220   *civ = *iv;
1221
1222   stmt = SSA_NAME_DEF_STMT (op);
1223   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1224               || TREE_CODE (stmt) == MODIFY_EXPR);
1225
1226   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1227   iv->use_id = use->id;
1228
1229   return use;
1230 }
1231
1232 /* Checks whether the condition *COND_P in STMT is interesting
1233    and if so, records it.  */
1234
1235 static void
1236 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1237 {
1238   tree *op0_p;
1239   tree *op1_p;
1240   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1241   struct iv const_iv;
1242   tree zero = integer_zero_node;
1243
1244   const_iv.step = NULL_TREE;
1245
1246   if (TREE_CODE (*cond_p) != SSA_NAME
1247       && !COMPARISON_CLASS_P (*cond_p))
1248     return;
1249
1250   if (TREE_CODE (*cond_p) == SSA_NAME)
1251     {
1252       op0_p = cond_p;
1253       op1_p = &zero;
1254     }
1255   else
1256     {
1257       op0_p = &TREE_OPERAND (*cond_p, 0);
1258       op1_p = &TREE_OPERAND (*cond_p, 1);
1259     }
1260
1261   if (TREE_CODE (*op0_p) == SSA_NAME)
1262     iv0 = get_iv (data, *op0_p);
1263   else
1264     iv0 = &const_iv;
1265
1266   if (TREE_CODE (*op1_p) == SSA_NAME)
1267     iv1 = get_iv (data, *op1_p);
1268   else
1269     iv1 = &const_iv;
1270
1271   if (/* When comparing with non-invariant value, we may not do any senseful
1272          induction variable elimination.  */
1273       (!iv0 || !iv1)
1274       /* Eliminating condition based on two ivs would be nontrivial.
1275          ??? TODO -- it is not really important to handle this case.  */
1276       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1277     {
1278       find_interesting_uses_op (data, *op0_p);
1279       find_interesting_uses_op (data, *op1_p);
1280       return;
1281     }
1282
1283   if (zero_p (iv0->step) && zero_p (iv1->step))
1284     {
1285       /* If both are invariants, this is a work for unswitching.  */
1286       return;
1287     }
1288
1289   civ = XNEW (struct iv);
1290   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1291   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1292 }
1293
1294 /* Returns true if expression EXPR is obviously invariant in LOOP,
1295    i.e. if all its operands are defined outside of the LOOP.  */
1296
1297 bool
1298 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1299 {
1300   basic_block def_bb;
1301   unsigned i, len;
1302
1303   if (is_gimple_min_invariant (expr))
1304     return true;
1305
1306   if (TREE_CODE (expr) == SSA_NAME)
1307     {
1308       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1309       if (def_bb
1310           && flow_bb_inside_loop_p (loop, def_bb))
1311         return false;
1312
1313       return true;
1314     }
1315
1316   if (!EXPR_P (expr))
1317     return false;
1318
1319   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1320   for (i = 0; i < len; i++)
1321     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1322       return false;
1323
1324   return true;
1325 }
1326
1327 /* Cumulates the steps of indices into DATA and replaces their values with the
1328    initial ones.  Returns false when the value of the index cannot be determined.
1329    Callback for for_each_index.  */
1330
1331 struct ifs_ivopts_data
1332 {
1333   struct ivopts_data *ivopts_data;
1334   tree stmt;
1335   tree *step_p;
1336 };
1337
1338 static bool
1339 idx_find_step (tree base, tree *idx, void *data)
1340 {
1341   struct ifs_ivopts_data *dta = data;
1342   struct iv *iv;
1343   tree step, iv_base, iv_step, lbound, off;
1344   struct loop *loop = dta->ivopts_data->current_loop;
1345
1346   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1347       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1348     return false;
1349
1350   /* If base is a component ref, require that the offset of the reference
1351      be invariant.  */
1352   if (TREE_CODE (base) == COMPONENT_REF)
1353     {
1354       off = component_ref_field_offset (base);
1355       return expr_invariant_in_loop_p (loop, off);
1356     }
1357
1358   /* If base is array, first check whether we will be able to move the
1359      reference out of the loop (in order to take its address in strength
1360      reduction).  In order for this to work we need both lower bound
1361      and step to be loop invariants.  */
1362   if (TREE_CODE (base) == ARRAY_REF)
1363     {
1364       step = array_ref_element_size (base);
1365       lbound = array_ref_low_bound (base);
1366
1367       if (!expr_invariant_in_loop_p (loop, step)
1368           || !expr_invariant_in_loop_p (loop, lbound))
1369         return false;
1370     }
1371
1372   if (TREE_CODE (*idx) != SSA_NAME)
1373     return true;
1374
1375   iv = get_iv (dta->ivopts_data, *idx);
1376   if (!iv)
1377     return false;
1378
1379   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1380           *&x[0], which is not folded and does not trigger the
1381           ARRAY_REF path below.  */
1382   *idx = iv->base;
1383
1384   if (!iv->step)
1385     return true;
1386
1387   if (TREE_CODE (base) == ARRAY_REF)
1388     {
1389       step = array_ref_element_size (base);
1390
1391       /* We only handle addresses whose step is an integer constant.  */
1392       if (TREE_CODE (step) != INTEGER_CST)
1393         return false;
1394     }
1395   else
1396     /* The step for pointer arithmetics already is 1 byte.  */
1397     step = build_int_cst (sizetype, 1);
1398
1399   iv_base = iv->base;
1400   iv_step = iv->step;
1401   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1402                             sizetype, &iv_base, &iv_step, dta->stmt,
1403                             false))
1404     {
1405       /* The index might wrap.  */
1406       return false;
1407     }
1408
1409   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1410
1411   if (!*dta->step_p)
1412     *dta->step_p = step;
1413   else
1414     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1415
1416   return true;
1417 }
1418
1419 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1420    object is passed to it in DATA.  */
1421
1422 static bool
1423 idx_record_use (tree base, tree *idx,
1424                 void *data)
1425 {
1426   find_interesting_uses_op (data, *idx);
1427   if (TREE_CODE (base) == ARRAY_REF)
1428     {
1429       find_interesting_uses_op (data, array_ref_element_size (base));
1430       find_interesting_uses_op (data, array_ref_low_bound (base));
1431     }
1432   return true;
1433 }
1434
1435 /* Returns true if memory reference REF may be unaligned.  */
1436
1437 static bool
1438 may_be_unaligned_p (tree ref)
1439 {
1440   tree base;
1441   tree base_type;
1442   HOST_WIDE_INT bitsize;
1443   HOST_WIDE_INT bitpos;
1444   tree toffset;
1445   enum machine_mode mode;
1446   int unsignedp, volatilep;
1447   unsigned base_align;
1448
1449   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1450      thus they are not misaligned.  */
1451   if (TREE_CODE (ref) == TARGET_MEM_REF)
1452     return false;
1453
1454   /* The test below is basically copy of what expr.c:normal_inner_ref
1455      does to check whether the object must be loaded by parts when
1456      STRICT_ALIGNMENT is true.  */
1457   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1458                               &unsignedp, &volatilep, true);
1459   base_type = TREE_TYPE (base);
1460   base_align = TYPE_ALIGN (base_type);
1461
1462   if (mode != BLKmode
1463       && (base_align < GET_MODE_ALIGNMENT (mode)
1464           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1465           || bitpos % BITS_PER_UNIT != 0))
1466     return true;
1467
1468   return false;
1469 }
1470
1471 /* Return true if EXPR may be non-addressable.   */
1472
1473 static bool
1474 may_be_nonaddressable_p (tree expr)
1475 {
1476   switch (TREE_CODE (expr))
1477     {
1478     case COMPONENT_REF:
1479       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1480              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1481
1482     case ARRAY_REF:
1483     case ARRAY_RANGE_REF:
1484       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1485
1486     case VIEW_CONVERT_EXPR:
1487       /* This kind of view-conversions may wrap non-addressable objects
1488          and make them look addressable.  After some processing the
1489          non-addressability may be uncovered again, causing ADDR_EXPRs
1490          of inappropriate objects to be built.  */
1491       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1492              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1493
1494     default:
1495       break;
1496     }
1497
1498   return false;
1499 }
1500
1501 /* Finds addresses in *OP_P inside STMT.  */
1502
1503 static void
1504 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1505 {
1506   tree base = *op_p, step = NULL;
1507   struct iv *civ;
1508   struct ifs_ivopts_data ifs_ivopts_data;
1509
1510   /* Do not play with volatile memory references.  A bit too conservative,
1511      perhaps, but safe.  */
1512   if (stmt_ann (stmt)->has_volatile_ops)
1513     goto fail;
1514
1515   /* Ignore bitfields for now.  Not really something terribly complicated
1516      to handle.  TODO.  */
1517   if (TREE_CODE (base) == BIT_FIELD_REF)
1518     goto fail;
1519
1520   if (may_be_nonaddressable_p (base))
1521     goto fail;
1522
1523   if (STRICT_ALIGNMENT
1524       && may_be_unaligned_p (base))
1525     goto fail;
1526
1527   base = unshare_expr (base);
1528
1529   if (TREE_CODE (base) == TARGET_MEM_REF)
1530     {
1531       tree type = build_pointer_type (TREE_TYPE (base));
1532       tree astep;
1533
1534       if (TMR_BASE (base)
1535           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1536         {
1537           civ = get_iv (data, TMR_BASE (base));
1538           if (!civ)
1539             goto fail;
1540
1541           TMR_BASE (base) = civ->base;
1542           step = civ->step;
1543         }
1544       if (TMR_INDEX (base)
1545           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1546         {
1547           civ = get_iv (data, TMR_INDEX (base));
1548           if (!civ)
1549             goto fail;
1550
1551           TMR_INDEX (base) = civ->base;
1552           astep = civ->step;
1553
1554           if (astep)
1555             {
1556               if (TMR_STEP (base))
1557                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1558
1559               if (step)
1560                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1561               else
1562                 step = astep;
1563             }
1564         }
1565
1566       if (zero_p (step))
1567         goto fail;
1568       base = tree_mem_ref_addr (type, base);
1569     }
1570   else
1571     {
1572       ifs_ivopts_data.ivopts_data = data;
1573       ifs_ivopts_data.stmt = stmt;
1574       ifs_ivopts_data.step_p = &step;
1575       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1576           || zero_p (step))
1577         goto fail;
1578
1579       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1580       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1581
1582       base = build_fold_addr_expr (base);
1583
1584       /* Substituting bases of IVs into the base expression might
1585          have caused folding opportunities.  */
1586       if (TREE_CODE (base) == ADDR_EXPR)
1587         {
1588           tree *ref = &TREE_OPERAND (base, 0);
1589           while (handled_component_p (*ref))
1590             ref = &TREE_OPERAND (*ref, 0);
1591           if (TREE_CODE (*ref) == INDIRECT_REF)
1592             *ref = fold_indirect_ref (*ref);
1593         }
1594     }
1595
1596   civ = alloc_iv (base, step);
1597   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1598   return;
1599
1600 fail:
1601   for_each_index (op_p, idx_record_use, data);
1602 }
1603
1604 /* Finds and records invariants used in STMT.  */
1605
1606 static void
1607 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1608 {
1609   ssa_op_iter iter;
1610   use_operand_p use_p;
1611   tree op;
1612
1613   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1614     {
1615       op = USE_FROM_PTR (use_p);
1616       record_invariant (data, op, false);
1617     }
1618 }
1619
1620 /* Finds interesting uses of induction variables in the statement STMT.  */
1621
1622 static void
1623 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1624 {
1625   struct iv *iv;
1626   tree op, lhs, rhs;
1627   ssa_op_iter iter;
1628   use_operand_p use_p;
1629
1630   find_invariants_stmt (data, stmt);
1631
1632   if (TREE_CODE (stmt) == COND_EXPR)
1633     {
1634       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1635       return;
1636     }
1637
1638   if (TREE_CODE (stmt) == MODIFY_EXPR)
1639     {
1640       lhs = TREE_OPERAND (stmt, 0);
1641       rhs = TREE_OPERAND (stmt, 1);
1642
1643       if (TREE_CODE (lhs) == SSA_NAME)
1644         {
1645           /* If the statement defines an induction variable, the uses are not
1646              interesting by themselves.  */
1647
1648           iv = get_iv (data, lhs);
1649
1650           if (iv && !zero_p (iv->step))
1651             return;
1652         }
1653
1654       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1655         {
1656         case tcc_comparison:
1657           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1658           return;
1659
1660         case tcc_reference:
1661           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1662           if (REFERENCE_CLASS_P (lhs))
1663             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1664           return;
1665
1666         default: ;
1667         }
1668
1669       if (REFERENCE_CLASS_P (lhs)
1670           && is_gimple_val (rhs))
1671         {
1672           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1673           find_interesting_uses_op (data, rhs);
1674           return;
1675         }
1676
1677       /* TODO -- we should also handle address uses of type
1678
1679          memory = call (whatever);
1680
1681          and
1682
1683          call (memory).  */
1684     }
1685
1686   if (TREE_CODE (stmt) == PHI_NODE
1687       && bb_for_stmt (stmt) == data->current_loop->header)
1688     {
1689       lhs = PHI_RESULT (stmt);
1690       iv = get_iv (data, lhs);
1691
1692       if (iv && !zero_p (iv->step))
1693         return;
1694     }
1695
1696   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1697     {
1698       op = USE_FROM_PTR (use_p);
1699
1700       if (TREE_CODE (op) != SSA_NAME)
1701         continue;
1702
1703       iv = get_iv (data, op);
1704       if (!iv)
1705         continue;
1706
1707       find_interesting_uses_op (data, op);
1708     }
1709 }
1710
1711 /* Finds interesting uses of induction variables outside of loops
1712    on loop exit edge EXIT.  */
1713
1714 static void
1715 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1716 {
1717   tree phi, def;
1718
1719   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1720     {
1721       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1722       find_interesting_uses_op (data, def);
1723     }
1724 }
1725
1726 /* Finds uses of the induction variables that are interesting.  */
1727
1728 static void
1729 find_interesting_uses (struct ivopts_data *data)
1730 {
1731   basic_block bb;
1732   block_stmt_iterator bsi;
1733   tree phi;
1734   basic_block *body = get_loop_body (data->current_loop);
1735   unsigned i;
1736   struct version_info *info;
1737   edge e;
1738
1739   if (dump_file && (dump_flags & TDF_DETAILS))
1740     fprintf (dump_file, "Uses:\n\n");
1741
1742   for (i = 0; i < data->current_loop->num_nodes; i++)
1743     {
1744       edge_iterator ei;
1745       bb = body[i];
1746
1747       FOR_EACH_EDGE (e, ei, bb->succs)
1748         if (e->dest != EXIT_BLOCK_PTR
1749             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1750           find_interesting_uses_outside (data, e);
1751
1752       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1753         find_interesting_uses_stmt (data, phi);
1754       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1755         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1756     }
1757
1758   if (dump_file && (dump_flags & TDF_DETAILS))
1759     {
1760       bitmap_iterator bi;
1761
1762       fprintf (dump_file, "\n");
1763
1764       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1765         {
1766           info = ver_info (data, i);
1767           if (info->inv_id)
1768             {
1769               fprintf (dump_file, "  ");
1770               print_generic_expr (dump_file, info->name, TDF_SLIM);
1771               fprintf (dump_file, " is invariant (%d)%s\n",
1772                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1773             }
1774         }
1775
1776       fprintf (dump_file, "\n");
1777     }
1778
1779   free (body);
1780 }
1781
1782 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1783    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1784    we are at the top-level of the processed address.  */
1785
1786 static tree
1787 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1788                 unsigned HOST_WIDE_INT *offset)
1789 {
1790   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1791   enum tree_code code;
1792   tree type, orig_type = TREE_TYPE (expr);
1793   unsigned HOST_WIDE_INT off0, off1, st;
1794   tree orig_expr = expr;
1795
1796   STRIP_NOPS (expr);
1797
1798   type = TREE_TYPE (expr);
1799   code = TREE_CODE (expr);
1800   *offset = 0;
1801
1802   switch (code)
1803     {
1804     case INTEGER_CST:
1805       if (!cst_and_fits_in_hwi (expr)
1806           || zero_p (expr))
1807         return orig_expr;
1808
1809       *offset = int_cst_value (expr);
1810       return build_int_cst (orig_type, 0);
1811
1812     case PLUS_EXPR:
1813     case MINUS_EXPR:
1814       op0 = TREE_OPERAND (expr, 0);
1815       op1 = TREE_OPERAND (expr, 1);
1816
1817       op0 = strip_offset_1 (op0, false, false, &off0);
1818       op1 = strip_offset_1 (op1, false, false, &off1);
1819
1820       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1821       if (op0 == TREE_OPERAND (expr, 0)
1822           && op1 == TREE_OPERAND (expr, 1))
1823         return orig_expr;
1824
1825       if (zero_p (op1))
1826         expr = op0;
1827       else if (zero_p (op0))
1828         {
1829           if (code == PLUS_EXPR)
1830             expr = op1;
1831           else
1832             expr = fold_build1 (NEGATE_EXPR, type, op1);
1833         }
1834       else
1835         expr = fold_build2 (code, type, op0, op1);
1836
1837       return fold_convert (orig_type, expr);
1838
1839     case ARRAY_REF:
1840       if (!inside_addr)
1841         return orig_expr;
1842
1843       step = array_ref_element_size (expr);
1844       if (!cst_and_fits_in_hwi (step))
1845         break;
1846
1847       st = int_cst_value (step);
1848       op1 = TREE_OPERAND (expr, 1);
1849       op1 = strip_offset_1 (op1, false, false, &off1);
1850       *offset = off1 * st;
1851
1852       if (top_compref
1853           && zero_p (op1))
1854         {
1855           /* Strip the component reference completely.  */
1856           op0 = TREE_OPERAND (expr, 0);
1857           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1858           *offset += off0;
1859           return op0;
1860         }
1861       break;
1862
1863     case COMPONENT_REF:
1864       if (!inside_addr)
1865         return orig_expr;
1866
1867       tmp = component_ref_field_offset (expr);
1868       if (top_compref
1869           && cst_and_fits_in_hwi (tmp))
1870         {
1871           /* Strip the component reference completely.  */
1872           op0 = TREE_OPERAND (expr, 0);
1873           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1874           *offset = off0 + int_cst_value (tmp);
1875           return op0;
1876         }
1877       break;
1878
1879     case ADDR_EXPR:
1880       op0 = TREE_OPERAND (expr, 0);
1881       op0 = strip_offset_1 (op0, true, true, &off0);
1882       *offset += off0;
1883
1884       if (op0 == TREE_OPERAND (expr, 0))
1885         return orig_expr;
1886
1887       expr = build_fold_addr_expr (op0);
1888       return fold_convert (orig_type, expr);
1889
1890     case INDIRECT_REF:
1891       inside_addr = false;
1892       break;
1893
1894     default:
1895       return orig_expr;
1896     }
1897
1898   /* Default handling of expressions for that we want to recurse into
1899      the first operand.  */
1900   op0 = TREE_OPERAND (expr, 0);
1901   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1902   *offset += off0;
1903
1904   if (op0 == TREE_OPERAND (expr, 0)
1905       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1906     return orig_expr;
1907
1908   expr = copy_node (expr);
1909   TREE_OPERAND (expr, 0) = op0;
1910   if (op1)
1911     TREE_OPERAND (expr, 1) = op1;
1912
1913   /* Inside address, we might strip the top level component references,
1914      thus changing type of the expression.  Handling of ADDR_EXPR
1915      will fix that.  */
1916   expr = fold_convert (orig_type, expr);
1917
1918   return expr;
1919 }
1920
1921 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1922
1923 static tree
1924 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1925 {
1926   return strip_offset_1 (expr, false, false, offset);
1927 }
1928
1929 /* Returns variant of TYPE that can be used as base for different uses.
1930    We return unsigned type with the same precision, which avoids problems
1931    with overflows.  */
1932
1933 static tree
1934 generic_type_for (tree type)
1935 {
1936   if (POINTER_TYPE_P (type))
1937     return unsigned_type_for (type);
1938
1939   if (TYPE_UNSIGNED (type))
1940     return type;
1941
1942   return unsigned_type_for (type);
1943 }
1944
1945 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1946    the bitmap to that we should store it.  */
1947
1948 static struct ivopts_data *fd_ivopts_data;
1949 static tree
1950 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1951 {
1952   bitmap *depends_on = data;
1953   struct version_info *info;
1954
1955   if (TREE_CODE (*expr_p) != SSA_NAME)
1956     return NULL_TREE;
1957   info = name_info (fd_ivopts_data, *expr_p);
1958
1959   if (!info->inv_id || info->has_nonlin_use)
1960     return NULL_TREE;
1961
1962   if (!*depends_on)
1963     *depends_on = BITMAP_ALLOC (NULL);
1964   bitmap_set_bit (*depends_on, info->inv_id);
1965
1966   return NULL_TREE;
1967 }
1968
1969 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1970    position to POS.  If USE is not NULL, the candidate is set as related to
1971    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1972    replacement of the final value of the iv by a direct computation.  */
1973
1974 static struct iv_cand *
1975 add_candidate_1 (struct ivopts_data *data,
1976                  tree base, tree step, bool important, enum iv_position pos,
1977                  struct iv_use *use, tree incremented_at)
1978 {
1979   unsigned i;
1980   struct iv_cand *cand = NULL;
1981   tree type, orig_type;
1982
1983   if (base)
1984     {
1985       orig_type = TREE_TYPE (base);
1986       type = generic_type_for (orig_type);
1987       if (type != orig_type)
1988         {
1989           base = fold_convert (type, base);
1990           if (step)
1991             step = fold_convert (type, step);
1992         }
1993     }
1994
1995   for (i = 0; i < n_iv_cands (data); i++)
1996     {
1997       cand = iv_cand (data, i);
1998
1999       if (cand->pos != pos)
2000         continue;
2001
2002       if (cand->incremented_at != incremented_at)
2003         continue;
2004
2005       if (!cand->iv)
2006         {
2007           if (!base && !step)
2008             break;
2009
2010           continue;
2011         }
2012
2013       if (!base && !step)
2014         continue;
2015
2016       if (!operand_equal_p (base, cand->iv->base, 0))
2017         continue;
2018
2019       if (zero_p (cand->iv->step))
2020         {
2021           if (zero_p (step))
2022             break;
2023         }
2024       else
2025         {
2026           if (step && operand_equal_p (step, cand->iv->step, 0))
2027             break;
2028         }
2029     }
2030
2031   if (i == n_iv_cands (data))
2032     {
2033       cand = XCNEW (struct iv_cand);
2034       cand->id = i;
2035
2036       if (!base && !step)
2037         cand->iv = NULL;
2038       else
2039         cand->iv = alloc_iv (base, step);
2040
2041       cand->pos = pos;
2042       if (pos != IP_ORIGINAL && cand->iv)
2043         {
2044           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2045           cand->var_after = cand->var_before;
2046         }
2047       cand->important = important;
2048       cand->incremented_at = incremented_at;
2049       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2050
2051       if (step
2052           && TREE_CODE (step) != INTEGER_CST)
2053         {
2054           fd_ivopts_data = data;
2055           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2056         }
2057
2058       if (dump_file && (dump_flags & TDF_DETAILS))
2059         dump_cand (dump_file, cand);
2060     }
2061
2062   if (important && !cand->important)
2063     {
2064       cand->important = true;
2065       if (dump_file && (dump_flags & TDF_DETAILS))
2066         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2067     }
2068
2069   if (use)
2070     {
2071       bitmap_set_bit (use->related_cands, i);
2072       if (dump_file && (dump_flags & TDF_DETAILS))
2073         fprintf (dump_file, "Candidate %d is related to use %d\n",
2074                  cand->id, use->id);
2075     }
2076
2077   return cand;
2078 }
2079
2080 /* Returns true if incrementing the induction variable at the end of the LOOP
2081    is allowed.
2082
2083    The purpose is to avoid splitting latch edge with a biv increment, thus
2084    creating a jump, possibly confusing other optimization passes and leaving
2085    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2086    is not available (so we do not have a better alternative), or if the latch
2087    edge is already nonempty.  */
2088
2089 static bool
2090 allow_ip_end_pos_p (struct loop *loop)
2091 {
2092   if (!ip_normal_pos (loop))
2093     return true;
2094
2095   if (!empty_block_p (ip_end_pos (loop)))
2096     return true;
2097
2098   return false;
2099 }
2100
2101 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2102    position to POS.  If USE is not NULL, the candidate is set as related to
2103    it.  The candidate computation is scheduled on all available positions.  */
2104
2105 static void
2106 add_candidate (struct ivopts_data *data,
2107                tree base, tree step, bool important, struct iv_use *use)
2108 {
2109   if (ip_normal_pos (data->current_loop))
2110     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2111   if (ip_end_pos (data->current_loop)
2112       && allow_ip_end_pos_p (data->current_loop))
2113     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2114 }
2115
2116 /* Add a standard "0 + 1 * iteration" iv candidate for a
2117    type with SIZE bits.  */
2118
2119 static void
2120 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2121                                      unsigned int size)
2122 {
2123   tree type = lang_hooks.types.type_for_size (size, true);
2124   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2125                  true, NULL);
2126 }
2127
2128 /* Adds standard iv candidates.  */
2129
2130 static void
2131 add_standard_iv_candidates (struct ivopts_data *data)
2132 {
2133   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2134
2135   /* The same for a double-integer type if it is still fast enough.  */
2136   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2137     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2138 }
2139
2140
2141 /* Adds candidates bases on the old induction variable IV.  */
2142
2143 static void
2144 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2145 {
2146   tree phi, def;
2147   struct iv_cand *cand;
2148
2149   add_candidate (data, iv->base, iv->step, true, NULL);
2150
2151   /* The same, but with initial value zero.  */
2152   add_candidate (data,
2153                  build_int_cst (TREE_TYPE (iv->base), 0),
2154                  iv->step, true, NULL);
2155
2156   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2157   if (TREE_CODE (phi) == PHI_NODE)
2158     {
2159       /* Additionally record the possibility of leaving the original iv
2160          untouched.  */
2161       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2162       cand = add_candidate_1 (data,
2163                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2164                               SSA_NAME_DEF_STMT (def));
2165       cand->var_before = iv->ssa_name;
2166       cand->var_after = def;
2167     }
2168 }
2169
2170 /* Adds candidates based on the old induction variables.  */
2171
2172 static void
2173 add_old_ivs_candidates (struct ivopts_data *data)
2174 {
2175   unsigned i;
2176   struct iv *iv;
2177   bitmap_iterator bi;
2178
2179   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2180     {
2181       iv = ver_info (data, i)->iv;
2182       if (iv && iv->biv_p && !zero_p (iv->step))
2183         add_old_iv_candidates (data, iv);
2184     }
2185 }
2186
2187 /* Adds candidates based on the value of the induction variable IV and USE.  */
2188
2189 static void
2190 add_iv_value_candidates (struct ivopts_data *data,
2191                          struct iv *iv, struct iv_use *use)
2192 {
2193   unsigned HOST_WIDE_INT offset;
2194   tree base;
2195
2196   add_candidate (data, iv->base, iv->step, false, use);
2197
2198   /* The same, but with initial value zero.  Make such variable important,
2199      since it is generic enough so that possibly many uses may be based
2200      on it.  */
2201   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2202                  iv->step, true, use);
2203
2204   /* Third, try removing the constant offset.  */
2205   base = strip_offset (iv->base, &offset);
2206   if (offset)
2207     add_candidate (data, base, iv->step, false, use);
2208 }
2209
2210 /* Adds candidates based on the uses.  */
2211
2212 static void
2213 add_derived_ivs_candidates (struct ivopts_data *data)
2214 {
2215   unsigned i;
2216
2217   for (i = 0; i < n_iv_uses (data); i++)
2218     {
2219       struct iv_use *use = iv_use (data, i);
2220
2221       if (!use)
2222         continue;
2223
2224       switch (use->type)
2225         {
2226         case USE_NONLINEAR_EXPR:
2227         case USE_COMPARE:
2228         case USE_ADDRESS:
2229           /* Just add the ivs based on the value of the iv used here.  */
2230           add_iv_value_candidates (data, use->iv, use);
2231           break;
2232
2233         default:
2234           gcc_unreachable ();
2235         }
2236     }
2237 }
2238
2239 /* Record important candidates and add them to related_cands bitmaps
2240    if needed.  */
2241
2242 static void
2243 record_important_candidates (struct ivopts_data *data)
2244 {
2245   unsigned i;
2246   struct iv_use *use;
2247
2248   for (i = 0; i < n_iv_cands (data); i++)
2249     {
2250       struct iv_cand *cand = iv_cand (data, i);
2251
2252       if (cand->important)
2253         bitmap_set_bit (data->important_candidates, i);
2254     }
2255
2256   data->consider_all_candidates = (n_iv_cands (data)
2257                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2258
2259   if (data->consider_all_candidates)
2260     {
2261       /* We will not need "related_cands" bitmaps in this case,
2262          so release them to decrease peak memory consumption.  */
2263       for (i = 0; i < n_iv_uses (data); i++)
2264         {
2265           use = iv_use (data, i);
2266           BITMAP_FREE (use->related_cands);
2267         }
2268     }
2269   else
2270     {
2271       /* Add important candidates to the related_cands bitmaps.  */
2272       for (i = 0; i < n_iv_uses (data); i++)
2273         bitmap_ior_into (iv_use (data, i)->related_cands,
2274                          data->important_candidates);
2275     }
2276 }
2277
2278 /* Finds the candidates for the induction variables.  */
2279
2280 static void
2281 find_iv_candidates (struct ivopts_data *data)
2282 {
2283   /* Add commonly used ivs.  */
2284   add_standard_iv_candidates (data);
2285
2286   /* Add old induction variables.  */
2287   add_old_ivs_candidates (data);
2288
2289   /* Add induction variables derived from uses.  */
2290   add_derived_ivs_candidates (data);
2291
2292   /* Record the important candidates.  */
2293   record_important_candidates (data);
2294 }
2295
2296 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2297    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2298    we allocate a simple list to every use.  */
2299
2300 static void
2301 alloc_use_cost_map (struct ivopts_data *data)
2302 {
2303   unsigned i, size, s, j;
2304
2305   for (i = 0; i < n_iv_uses (data); i++)
2306     {
2307       struct iv_use *use = iv_use (data, i);
2308       bitmap_iterator bi;
2309
2310       if (data->consider_all_candidates)
2311         size = n_iv_cands (data);
2312       else
2313         {
2314           s = 0;
2315           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2316             {
2317               s++;
2318             }
2319
2320           /* Round up to the power of two, so that moduling by it is fast.  */
2321           for (size = 1; size < s; size <<= 1)
2322             continue;
2323         }
2324
2325       use->n_map_members = size;
2326       use->cost_map = XCNEWVEC (struct cost_pair, size);
2327     }
2328 }
2329
2330 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2331    on invariants DEPENDS_ON and that the value used in expressing it
2332    is VALUE.*/
2333
2334 static void
2335 set_use_iv_cost (struct ivopts_data *data,
2336                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2337                  bitmap depends_on, tree value)
2338 {
2339   unsigned i, s;
2340
2341   if (cost == INFTY)
2342     {
2343       BITMAP_FREE (depends_on);
2344       return;
2345     }
2346
2347   if (data->consider_all_candidates)
2348     {
2349       use->cost_map[cand->id].cand = cand;
2350       use->cost_map[cand->id].cost = cost;
2351       use->cost_map[cand->id].depends_on = depends_on;
2352       use->cost_map[cand->id].value = value;
2353       return;
2354     }
2355
2356   /* n_map_members is a power of two, so this computes modulo.  */
2357   s = cand->id & (use->n_map_members - 1);
2358   for (i = s; i < use->n_map_members; i++)
2359     if (!use->cost_map[i].cand)
2360       goto found;
2361   for (i = 0; i < s; i++)
2362     if (!use->cost_map[i].cand)
2363       goto found;
2364
2365   gcc_unreachable ();
2366
2367 found:
2368   use->cost_map[i].cand = cand;
2369   use->cost_map[i].cost = cost;
2370   use->cost_map[i].depends_on = depends_on;
2371   use->cost_map[i].value = value;
2372 }
2373
2374 /* Gets cost of (USE, CANDIDATE) pair.  */
2375
2376 static struct cost_pair *
2377 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2378                  struct iv_cand *cand)
2379 {
2380   unsigned i, s;
2381   struct cost_pair *ret;
2382
2383   if (!cand)
2384     return NULL;
2385
2386   if (data->consider_all_candidates)
2387     {
2388       ret = use->cost_map + cand->id;
2389       if (!ret->cand)
2390         return NULL;
2391
2392       return ret;
2393     }
2394
2395   /* n_map_members is a power of two, so this computes modulo.  */
2396   s = cand->id & (use->n_map_members - 1);
2397   for (i = s; i < use->n_map_members; i++)
2398     if (use->cost_map[i].cand == cand)
2399       return use->cost_map + i;
2400
2401   for (i = 0; i < s; i++)
2402     if (use->cost_map[i].cand == cand)
2403       return use->cost_map + i;
2404
2405   return NULL;
2406 }
2407
2408 /* Returns estimate on cost of computing SEQ.  */
2409
2410 static unsigned
2411 seq_cost (rtx seq)
2412 {
2413   unsigned cost = 0;
2414   rtx set;
2415
2416   for (; seq; seq = NEXT_INSN (seq))
2417     {
2418       set = single_set (seq);
2419       if (set)
2420         cost += rtx_cost (set, SET);
2421       else
2422         cost++;
2423     }
2424
2425   return cost;
2426 }
2427
2428 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2429 static rtx
2430 produce_memory_decl_rtl (tree obj, int *regno)
2431 {
2432   rtx x;
2433
2434   gcc_assert (obj);
2435   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2436     {
2437       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2438       x = gen_rtx_SYMBOL_REF (Pmode, name);
2439     }
2440   else
2441     x = gen_raw_REG (Pmode, (*regno)++);
2442
2443   return gen_rtx_MEM (DECL_MODE (obj), x);
2444 }
2445
2446 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2447    walk_tree.  DATA contains the actual fake register number.  */
2448
2449 static tree
2450 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2451 {
2452   tree obj = NULL_TREE;
2453   rtx x = NULL_RTX;
2454   int *regno = data;
2455
2456   switch (TREE_CODE (*expr_p))
2457     {
2458     case ADDR_EXPR:
2459       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2460            handled_component_p (*expr_p);
2461            expr_p = &TREE_OPERAND (*expr_p, 0))
2462         continue;
2463       obj = *expr_p;
2464       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2465         x = produce_memory_decl_rtl (obj, regno);
2466       break;
2467
2468     case SSA_NAME:
2469       *ws = 0;
2470       obj = SSA_NAME_VAR (*expr_p);
2471       if (!DECL_RTL_SET_P (obj))
2472         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2473       break;
2474
2475     case VAR_DECL:
2476     case PARM_DECL:
2477     case RESULT_DECL:
2478       *ws = 0;
2479       obj = *expr_p;
2480
2481       if (DECL_RTL_SET_P (obj))
2482         break;
2483
2484       if (DECL_MODE (obj) == BLKmode)
2485         x = produce_memory_decl_rtl (obj, regno);
2486       else
2487         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2488
2489       break;
2490
2491     default:
2492       break;
2493     }
2494
2495   if (x)
2496     {
2497       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2498       SET_DECL_RTL (obj, x);
2499     }
2500
2501   return NULL_TREE;
2502 }
2503
2504 /* Determines cost of the computation of EXPR.  */
2505
2506 static unsigned
2507 computation_cost (tree expr)
2508 {
2509   rtx seq, rslt;
2510   tree type = TREE_TYPE (expr);
2511   unsigned cost;
2512   /* Avoid using hard regs in ways which may be unsupported.  */
2513   int regno = LAST_VIRTUAL_REGISTER + 1;
2514
2515   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2516   start_sequence ();
2517   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2518   seq = get_insns ();
2519   end_sequence ();
2520
2521   cost = seq_cost (seq);
2522   if (MEM_P (rslt))
2523     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2524
2525   return cost;
2526 }
2527
2528 /* Returns variable containing the value of candidate CAND at statement AT.  */
2529
2530 static tree
2531 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2532 {
2533   if (stmt_after_increment (loop, cand, stmt))
2534     return cand->var_after;
2535   else
2536     return cand->var_before;
2537 }
2538
2539 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2540    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2541
2542 int
2543 tree_int_cst_sign_bit (tree t)
2544 {
2545   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2546   unsigned HOST_WIDE_INT w;
2547
2548   if (bitno < HOST_BITS_PER_WIDE_INT)
2549     w = TREE_INT_CST_LOW (t);
2550   else
2551     {
2552       w = TREE_INT_CST_HIGH (t);
2553       bitno -= HOST_BITS_PER_WIDE_INT;
2554     }
2555
2556   return (w >> bitno) & 1;
2557 }
2558
2559 /* If we can prove that TOP = cst * BOT for some constant cst,
2560    store cst to MUL and return true.  Otherwise return false.
2561    The returned value is always sign-extended, regardless of the
2562    signedness of TOP and BOT.  */
2563
2564 static bool
2565 constant_multiple_of (tree top, tree bot, double_int *mul)
2566 {
2567   tree mby;
2568   enum tree_code code;
2569   double_int res, p0, p1;
2570   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2571
2572   STRIP_NOPS (top);
2573   STRIP_NOPS (bot);
2574
2575   if (operand_equal_p (top, bot, 0))
2576     {
2577       *mul = double_int_one;
2578       return true;
2579     }
2580
2581   code = TREE_CODE (top);
2582   switch (code)
2583     {
2584     case MULT_EXPR:
2585       mby = TREE_OPERAND (top, 1);
2586       if (TREE_CODE (mby) != INTEGER_CST)
2587         return false;
2588
2589       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2590         return false;
2591
2592       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2593                               precision);
2594       return true;
2595
2596     case PLUS_EXPR:
2597     case MINUS_EXPR:
2598       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2599           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2600         return false;
2601
2602       if (code == MINUS_EXPR)
2603         p1 = double_int_neg (p1);
2604       *mul = double_int_sext (double_int_add (p0, p1), precision);
2605       return true;
2606
2607     case INTEGER_CST:
2608       if (TREE_CODE (bot) != INTEGER_CST)
2609         return false;
2610
2611       p0 = double_int_sext (tree_to_double_int (top), precision);
2612       p1 = double_int_sext (tree_to_double_int (bot), precision);
2613       if (double_int_zero_p (p1))
2614         return false;
2615       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2616                               precision);
2617       return double_int_zero_p (res);
2618
2619     default:
2620       return false;
2621     }
2622 }
2623
2624 /* Sets COMB to CST.  */
2625
2626 static void
2627 aff_combination_const (struct affine_tree_combination *comb, tree type,
2628                        unsigned HOST_WIDE_INT cst)
2629 {
2630   unsigned prec = TYPE_PRECISION (type);
2631
2632   comb->type = type;
2633   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2634
2635   comb->n = 0;
2636   comb->rest = NULL_TREE;
2637   comb->offset = cst & comb->mask;
2638 }
2639
2640 /* Sets COMB to single element ELT.  */
2641
2642 static void
2643 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2644 {
2645   unsigned prec = TYPE_PRECISION (type);
2646
2647   comb->type = type;
2648   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2649
2650   comb->n = 1;
2651   comb->elts[0] = elt;
2652   comb->coefs[0] = 1;
2653   comb->rest = NULL_TREE;
2654   comb->offset = 0;
2655 }
2656
2657 /* Scales COMB by SCALE.  */
2658
2659 static void
2660 aff_combination_scale (struct affine_tree_combination *comb,
2661                        unsigned HOST_WIDE_INT scale)
2662 {
2663   unsigned i, j;
2664
2665   if (scale == 1)
2666     return;
2667
2668   if (scale == 0)
2669     {
2670       aff_combination_const (comb, comb->type, 0);
2671       return;
2672     }
2673
2674   comb->offset = (scale * comb->offset) & comb->mask;
2675   for (i = 0, j = 0; i < comb->n; i++)
2676     {
2677       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2678       comb->elts[j] = comb->elts[i];
2679       if (comb->coefs[j] != 0)
2680         j++;
2681     }
2682   comb->n = j;
2683
2684   if (comb->rest)
2685     {
2686       if (comb->n < MAX_AFF_ELTS)
2687         {
2688           comb->coefs[comb->n] = scale;
2689           comb->elts[comb->n] = comb->rest;
2690           comb->rest = NULL_TREE;
2691           comb->n++;
2692         }
2693       else
2694         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2695                                   build_int_cst_type (comb->type, scale));
2696     }
2697 }
2698
2699 /* Adds ELT * SCALE to COMB.  */
2700
2701 static void
2702 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2703                          unsigned HOST_WIDE_INT scale)
2704 {
2705   unsigned i;
2706
2707   if (scale == 0)
2708     return;
2709
2710   for (i = 0; i < comb->n; i++)
2711     if (operand_equal_p (comb->elts[i], elt, 0))
2712       {
2713         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2714         if (comb->coefs[i])
2715           return;
2716
2717         comb->n--;
2718         comb->coefs[i] = comb->coefs[comb->n];
2719         comb->elts[i] = comb->elts[comb->n];
2720
2721         if (comb->rest)
2722           {
2723             gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2724             comb->coefs[comb->n] = 1;
2725             comb->elts[comb->n] = comb->rest;
2726             comb->rest = NULL_TREE;
2727             comb->n++;
2728           }
2729         return;
2730       }
2731   if (comb->n < MAX_AFF_ELTS)
2732     {
2733       comb->coefs[comb->n] = scale;
2734       comb->elts[comb->n] = elt;
2735       comb->n++;
2736       return;
2737     }
2738
2739   if (scale == 1)
2740     elt = fold_convert (comb->type, elt);
2741   else
2742     elt = fold_build2 (MULT_EXPR, comb->type,
2743                        fold_convert (comb->type, elt),
2744                        build_int_cst_type (comb->type, scale));
2745
2746   if (comb->rest)
2747     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2748   else
2749     comb->rest = elt;
2750 }
2751
2752 /* Adds COMB2 to COMB1.  */
2753
2754 static void
2755 aff_combination_add (struct affine_tree_combination *comb1,
2756                      struct affine_tree_combination *comb2)
2757 {
2758   unsigned i;
2759
2760   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2761   for (i = 0; i < comb2->n; i++)
2762     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2763   if (comb2->rest)
2764     aff_combination_add_elt (comb1, comb2->rest, 1);
2765 }
2766
2767 /* Convert COMB to TYPE.  */
2768
2769 static void
2770 aff_combination_convert (tree type, struct affine_tree_combination *comb)
2771 {
2772   unsigned prec = TYPE_PRECISION (type);
2773   unsigned i;
2774
2775   /* If the precision of both types is the same, it suffices to change the type
2776      of the whole combination -- the elements are allowed to have another type
2777      equivalent wrto STRIP_NOPS.  */
2778   if (prec == TYPE_PRECISION (comb->type))
2779     {
2780       comb->type = type;
2781       return;
2782     }
2783
2784   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2785   comb->offset = comb->offset & comb->mask;
2786
2787   /* The type of the elements can be different from comb->type only as
2788      much as what STRIP_NOPS would remove.  We can just directly cast
2789      to TYPE.  */
2790   for (i = 0; i < comb->n; i++)
2791     comb->elts[i] = fold_convert (type, comb->elts[i]);
2792   if (comb->rest)
2793     comb->rest = fold_convert (type, comb->rest);
2794
2795   comb->type = type;
2796 }
2797
2798 /* Splits EXPR into an affine combination of parts.  */
2799
2800 static void
2801 tree_to_aff_combination (tree expr, tree type,
2802                          struct affine_tree_combination *comb)
2803 {
2804   struct affine_tree_combination tmp;
2805   enum tree_code code;
2806   tree cst, core, toffset;
2807   HOST_WIDE_INT bitpos, bitsize;
2808   enum machine_mode mode;
2809   int unsignedp, volatilep;
2810
2811   STRIP_NOPS (expr);
2812
2813   code = TREE_CODE (expr);
2814   switch (code)
2815     {
2816     case INTEGER_CST:
2817       aff_combination_const (comb, type, int_cst_value (expr));
2818       return;
2819
2820     case PLUS_EXPR:
2821     case MINUS_EXPR:
2822       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2823       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2824       if (code == MINUS_EXPR)
2825         aff_combination_scale (&tmp, -1);
2826       aff_combination_add (comb, &tmp);
2827       return;
2828
2829     case MULT_EXPR:
2830       cst = TREE_OPERAND (expr, 1);
2831       if (TREE_CODE (cst) != INTEGER_CST)
2832         break;
2833       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2834       aff_combination_scale (comb, int_cst_value (cst));
2835       return;
2836
2837     case NEGATE_EXPR:
2838       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2839       aff_combination_scale (comb, -1);
2840       return;
2841
2842     case ADDR_EXPR:
2843       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2844                                   &toffset, &mode, &unsignedp, &volatilep,
2845                                   false);
2846       if (bitpos % BITS_PER_UNIT != 0)
2847         break;
2848       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2849       core = build_fold_addr_expr (core);
2850       if (TREE_CODE (core) == ADDR_EXPR)
2851         aff_combination_add_elt (comb, core, 1);
2852       else
2853         {
2854           tree_to_aff_combination (core, type, &tmp);
2855           aff_combination_add (comb, &tmp);
2856         }
2857       if (toffset)
2858         {
2859           tree_to_aff_combination (toffset, type, &tmp);
2860           aff_combination_add (comb, &tmp);
2861         }
2862       return;
2863
2864     default:
2865       break;
2866     }
2867
2868   aff_combination_elt (comb, type, expr);
2869 }
2870
2871 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2872
2873 static tree
2874 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2875                  unsigned HOST_WIDE_INT mask)
2876 {
2877   enum tree_code code;
2878
2879   scale &= mask;
2880   elt = fold_convert (type, elt);
2881
2882   if (scale == 1)
2883     {
2884       if (!expr)
2885         return elt;
2886
2887       return fold_build2 (PLUS_EXPR, type, expr, elt);
2888     }
2889
2890   if (scale == mask)
2891     {
2892       if (!expr)
2893         return fold_build1 (NEGATE_EXPR, type, elt);
2894
2895       return fold_build2 (MINUS_EXPR, type, expr, elt);
2896     }
2897
2898   if (!expr)
2899     return fold_build2 (MULT_EXPR, type, elt,
2900                         build_int_cst_type (type, scale));
2901
2902   if ((scale | (mask >> 1)) == mask)
2903     {
2904       /* Scale is negative.  */
2905       code = MINUS_EXPR;
2906       scale = (-scale) & mask;
2907     }
2908   else
2909     code = PLUS_EXPR;
2910
2911   elt = fold_build2 (MULT_EXPR, type, elt,
2912                      build_int_cst_type (type, scale));
2913   return fold_build2 (code, type, expr, elt);
2914 }
2915
2916 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2917
2918 static void
2919 unshare_aff_combination (struct affine_tree_combination *comb)
2920 {
2921   unsigned i;
2922
2923   for (i = 0; i < comb->n; i++)
2924     comb->elts[i] = unshare_expr (comb->elts[i]);
2925   if (comb->rest)
2926     comb->rest = unshare_expr (comb->rest);
2927 }
2928
2929 /* Makes tree from the affine combination COMB.  */
2930
2931 static tree
2932 aff_combination_to_tree (struct affine_tree_combination *comb)
2933 {
2934   tree type = comb->type;
2935   tree expr = comb->rest;
2936   unsigned i;
2937   unsigned HOST_WIDE_INT off, sgn;
2938
2939   if (comb->n == 0 && comb->offset == 0)
2940     {
2941       if (expr)
2942         {
2943           /* Handle the special case produced by get_computation_aff when
2944              the type does not fit in HOST_WIDE_INT.  */
2945           return fold_convert (type, expr);
2946         }
2947       else
2948         return build_int_cst (type, 0);
2949     }
2950
2951   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2952
2953   for (i = 0; i < comb->n; i++)
2954     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2955                             comb->mask);
2956
2957   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2958     {
2959       /* Offset is negative.  */
2960       off = (-comb->offset) & comb->mask;
2961       sgn = comb->mask;
2962     }
2963   else
2964     {
2965       off = comb->offset;
2966       sgn = 1;
2967     }
2968   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2969                           comb->mask);
2970 }
2971
2972 /* Folds EXPR using the affine expressions framework.  */
2973
2974 static tree
2975 fold_affine_expr (tree expr)
2976 {
2977   tree type = TREE_TYPE (expr);
2978   struct affine_tree_combination comb;
2979
2980   if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2981     return expr;
2982
2983   tree_to_aff_combination (expr, type, &comb);
2984   return aff_combination_to_tree (&comb);
2985 }
2986
2987 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2988    same precision that is at least as wide as the precision of TYPE, stores
2989    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2990    type of A and B.  */
2991
2992 static tree
2993 determine_common_wider_type (tree *a, tree *b)
2994 {
2995   tree wider_type = NULL;
2996   tree suba, subb;
2997   tree atype = TREE_TYPE (*a);
2998
2999   if ((TREE_CODE (*a) == NOP_EXPR
3000        || TREE_CODE (*a) == CONVERT_EXPR))
3001     {
3002       suba = TREE_OPERAND (*a, 0);
3003       wider_type = TREE_TYPE (suba);
3004       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3005         return atype;
3006     }
3007   else
3008     return atype;
3009
3010   if ((TREE_CODE (*b) == NOP_EXPR
3011        || TREE_CODE (*b) == CONVERT_EXPR))
3012     {
3013       subb = TREE_OPERAND (*b, 0);
3014       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3015         return atype;
3016     }
3017   else
3018     return atype;
3019
3020   *a = suba;
3021   *b = subb;
3022   return wider_type;
3023 }
3024
3025 /* Determines the expression by that USE is expressed from induction variable
3026    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3027    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3028
3029 static bool
3030 get_computation_aff (struct loop *loop,
3031                      struct iv_use *use, struct iv_cand *cand, tree at,
3032                      struct affine_tree_combination *aff)
3033 {
3034   tree ubase = use->iv->base;
3035   tree ustep = use->iv->step;
3036   tree cbase = cand->iv->base;
3037   tree cstep = cand->iv->step;
3038   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3039   tree common_type;
3040   tree uutype;
3041   tree expr, delta;
3042   tree ratio;
3043   unsigned HOST_WIDE_INT ustepi, cstepi;
3044   HOST_WIDE_INT ratioi;
3045   struct affine_tree_combination cbase_aff, expr_aff;
3046   tree cstep_orig = cstep, ustep_orig = ustep;
3047   double_int rat;
3048
3049   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3050     {
3051       /* We do not have a precision to express the values of use.  */
3052       return false;
3053     }
3054
3055   expr = var_at_stmt (loop, cand, at);
3056
3057   if (TREE_TYPE (expr) != ctype)
3058     {
3059       /* This may happen with the original ivs.  */
3060       expr = fold_convert (ctype, expr);
3061     }
3062
3063   if (TYPE_UNSIGNED (utype))
3064     uutype = utype;
3065   else
3066     {
3067       uutype = unsigned_type_for (utype);
3068       ubase = fold_convert (uutype, ubase);
3069       ustep = fold_convert (uutype, ustep);
3070     }
3071
3072   if (uutype != ctype)
3073     {
3074       expr = fold_convert (uutype, expr);
3075       cbase = fold_convert (uutype, cbase);
3076       cstep = fold_convert (uutype, cstep);
3077
3078       /* If the conversion is not noop, we must take it into account when
3079          considering the value of the step.  */
3080       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3081         cstep_orig = cstep;
3082     }
3083
3084   if (cst_and_fits_in_hwi (cstep_orig)
3085       && cst_and_fits_in_hwi (ustep_orig))
3086     {
3087       ustepi = int_cst_value (ustep_orig);
3088       cstepi = int_cst_value (cstep_orig);
3089
3090       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3091         {
3092           /* TODO maybe consider case when ustep divides cstep and the ratio is
3093              a power of 2 (so that the division is fast to execute)?  We would
3094              need to be much more careful with overflows etc. then.  */
3095           return false;
3096         }
3097
3098       ratio = build_int_cst_type (uutype, ratioi);
3099     }
3100   else
3101     {
3102       if (!constant_multiple_of (ustep_orig, cstep_orig, &rat))
3103         return false;
3104       ratio = double_int_to_tree (uutype, rat);
3105
3106       /* Ratioi is only used to detect special cases when the multiplicative
3107          factor is 1 or -1, so if rat does not fit to HOST_WIDE_INT, we may
3108          set it to 0.  */
3109       if (double_int_fits_in_shwi_p (rat))
3110         ratioi = double_int_to_shwi (rat);
3111       else
3112         ratioi = 0;
3113     }
3114
3115   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3116      type, we achieve better folding by computing their difference in this
3117      wider type, and cast the result to UUTYPE.  We do not need to worry about
3118      overflows, as all the arithmetics will in the end be performed in UUTYPE
3119      anyway.  */
3120   common_type = determine_common_wider_type (&ubase, &cbase);
3121
3122   /* We may need to shift the value if we are after the increment.  */
3123   if (stmt_after_increment (loop, cand, at))
3124     {
3125       if (uutype != common_type)
3126         cstep = fold_convert (common_type, cstep);
3127       cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep);
3128     }
3129
3130   /* use = ubase - ratio * cbase + ratio * var.
3131
3132      In general case ubase + ratio * (var - cbase) could be better (one less
3133      multiplication), but often it is possible to eliminate redundant parts
3134      of computations from (ubase - ratio * cbase) term, and if it does not
3135      happen, fold is able to apply the distributive law to obtain this form
3136      anyway.  */
3137
3138   if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT)
3139     {
3140       /* Let's compute in trees and just return the result in AFF.  This case
3141          should not be very common, and fold itself is not that bad either,
3142          so making the aff. functions more complicated to handle this case
3143          is not that urgent.  */
3144       if (ratioi == 1)
3145         {
3146           delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase);
3147           if (uutype != common_type)
3148             delta = fold_convert (uutype, delta);
3149           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3150         }
3151       else if (ratioi == -1)
3152         {
3153           delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase);
3154           if (uutype != common_type)
3155             delta = fold_convert (uutype, delta);
3156           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3157         }
3158       else
3159         {
3160           delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio);
3161           delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta);
3162           if (uutype != common_type)
3163             delta = fold_convert (uutype, delta);
3164           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3165           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3166         }
3167
3168       aff->type = uutype;
3169       aff->n = 0;
3170       aff->offset = 0;
3171       aff->mask = 0;
3172       aff->rest = expr;
3173       return true;
3174     }
3175
3176   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3177      possible to compute ratioi.  */
3178   gcc_assert (ratioi);
3179
3180   tree_to_aff_combination (ubase, common_type, aff);
3181   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3182   tree_to_aff_combination (expr, uutype, &expr_aff);
3183   aff_combination_scale (&cbase_aff, -ratioi);
3184   aff_combination_scale (&expr_aff, ratioi);
3185   aff_combination_add (aff, &cbase_aff);
3186   if (common_type != uutype)
3187     aff_combination_convert (uutype, aff);
3188   aff_combination_add (aff, &expr_aff);
3189
3190   return true;
3191 }
3192
3193 /* Determines the expression by that USE is expressed from induction variable
3194    CAND at statement AT in LOOP.  The computation is unshared.  */
3195
3196 static tree
3197 get_computation_at (struct loop *loop,
3198                     struct iv_use *use, struct iv_cand *cand, tree at)
3199 {
3200   struct affine_tree_combination aff;
3201   tree type = TREE_TYPE (use->iv->base);
3202
3203   if (!get_computation_aff (loop, use, cand, at, &aff))
3204     return NULL_TREE;
3205   unshare_aff_combination (&aff);
3206   return fold_convert (type, aff_combination_to_tree (&aff));
3207 }
3208
3209 /* Determines the expression by that USE is expressed from induction variable
3210    CAND in LOOP.  The computation is unshared.  */
3211
3212 static tree
3213 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3214 {
3215   return get_computation_at (loop, use, cand, use->stmt);
3216 }
3217
3218 /* Returns cost of addition in MODE.  */
3219
3220 static unsigned
3221 add_cost (enum machine_mode mode)
3222 {
3223   static unsigned costs[NUM_MACHINE_MODES];
3224   rtx seq;
3225   unsigned cost;
3226
3227   if (costs[mode])
3228     return costs[mode];
3229
3230   start_sequence ();
3231   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3232                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3233                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3234                  NULL_RTX);
3235   seq = get_insns ();
3236   end_sequence ();
3237
3238   cost = seq_cost (seq);
3239   if (!cost)
3240     cost = 1;
3241
3242   costs[mode] = cost;
3243
3244   if (dump_file && (dump_flags & TDF_DETAILS))
3245     fprintf (dump_file, "Addition in %s costs %d\n",
3246              GET_MODE_NAME (mode), cost);
3247   return cost;
3248 }
3249
3250 /* Entry in a hashtable of already known costs for multiplication.  */
3251 struct mbc_entry
3252 {
3253   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3254   enum machine_mode mode;       /* In mode.  */
3255   unsigned cost;                /* The cost.  */
3256 };
3257
3258 /* Counts hash value for the ENTRY.  */
3259
3260 static hashval_t
3261 mbc_entry_hash (const void *entry)
3262 {
3263   const struct mbc_entry *e = entry;
3264
3265   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3266 }
3267
3268 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3269
3270 static int
3271 mbc_entry_eq (const void *entry1, const void *entry2)
3272 {
3273   const struct mbc_entry *e1 = entry1;
3274   const struct mbc_entry *e2 = entry2;
3275
3276   return (e1->mode == e2->mode
3277           && e1->cst == e2->cst);
3278 }
3279
3280 /* Returns cost of multiplication by constant CST in MODE.  */
3281
3282 unsigned
3283 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3284 {
3285   static htab_t costs;
3286   struct mbc_entry **cached, act;
3287   rtx seq;
3288   unsigned cost;
3289
3290   if (!costs)
3291     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3292
3293   act.mode = mode;
3294   act.cst = cst;
3295   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3296   if (*cached)
3297     return (*cached)->cost;
3298
3299   *cached = XNEW (struct mbc_entry);
3300   (*cached)->mode = mode;
3301   (*cached)->cst = cst;
3302
3303   start_sequence ();
3304   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3305                gen_int_mode (cst, mode), NULL_RTX, 0);
3306   seq = get_insns ();
3307   end_sequence ();
3308
3309   cost = seq_cost (seq);
3310
3311   if (dump_file && (dump_flags & TDF_DETAILS))
3312     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3313              (int) cst, GET_MODE_NAME (mode), cost);
3314
3315   (*cached)->cost = cost;
3316
3317   return cost;
3318 }
3319
3320 /* Returns true if multiplying by RATIO is allowed in address.  */
3321
3322 bool
3323 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3324 {
3325 #define MAX_RATIO 128
3326   static sbitmap valid_mult;
3327
3328   if (!valid_mult)
3329     {
3330       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3331       rtx addr;
3332       HOST_WIDE_INT i;
3333
3334       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3335       sbitmap_zero (valid_mult);
3336       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3337       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3338         {
3339           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3340           if (memory_address_p (Pmode, addr))
3341             SET_BIT (valid_mult, i + MAX_RATIO);
3342         }
3343
3344       if (dump_file && (dump_flags & TDF_DETAILS))
3345         {
3346           fprintf (dump_file, "  allowed multipliers:");
3347           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3348             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3349               fprintf (dump_file, " %d", (int) i);
3350           fprintf (dump_file, "\n");
3351           fprintf (dump_file, "\n");
3352         }
3353     }
3354
3355   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3356     return false;
3357
3358   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3359 }
3360
3361 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3362    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3363    variable is omitted.  The created memory accesses MODE.
3364
3365    TODO -- there must be some better way.  This all is quite crude.  */
3366
3367 static unsigned
3368 get_address_cost (bool symbol_present, bool var_present,
3369                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3370 {
3371   static bool initialized = false;
3372   static HOST_WIDE_INT rat, off;
3373   static HOST_WIDE_INT min_offset, max_offset;
3374   static unsigned costs[2][2][2][2];
3375   unsigned cost, acost;
3376   bool offset_p, ratio_p;
3377   HOST_WIDE_INT s_offset;
3378   unsigned HOST_WIDE_INT mask;
3379   unsigned bits;
3380
3381   if (!initialized)
3382     {
3383       HOST_WIDE_INT i;
3384       int old_cse_not_expected;
3385       unsigned sym_p, var_p, off_p, rat_p, add_c;
3386       rtx seq, addr, base;
3387       rtx reg0, reg1;
3388
3389       initialized = true;
3390
3391       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3392
3393       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3394       for (i = 1; i <= 1 << 20; i <<= 1)
3395         {
3396           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3397           if (!memory_address_p (Pmode, addr))
3398             break;
3399         }
3400       max_offset = i >> 1;
3401       off = max_offset;
3402
3403       for (i = 1; i <= 1 << 20; i <<= 1)
3404         {
3405           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3406           if (!memory_address_p (Pmode, addr))
3407             break;
3408         }
3409       min_offset = -(i >> 1);
3410
3411       if (dump_file && (dump_flags & TDF_DETAILS))
3412         {
3413           fprintf (dump_file, "get_address_cost:\n");
3414           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3415           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3416         }
3417
3418       rat = 1;
3419       for (i = 2; i <= MAX_RATIO; i++)
3420         if (multiplier_allowed_in_address_p (i))
3421           {
3422             rat = i;
3423             break;
3424           }
3425
3426       /* Compute the cost of various addressing modes.  */
3427       acost = 0;
3428       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3429       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3430
3431       for (i = 0; i < 16; i++)
3432         {
3433           sym_p = i & 1;
3434           var_p = (i >> 1) & 1;
3435           off_p = (i >> 2) & 1;
3436           rat_p = (i >> 3) & 1;
3437
3438           addr = reg0;
3439           if (rat_p)
3440             addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3441
3442           if (var_p)
3443             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3444
3445           if (sym_p)
3446             {
3447               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3448               if (off_p)
3449                 base = gen_rtx_fmt_e (CONST, Pmode,
3450                                       gen_rtx_fmt_ee (PLUS, Pmode,
3451                                                       base,
3452                                                       gen_int_mode (off, Pmode)));
3453             }
3454           else if (off_p)
3455             base = gen_int_mode (off, Pmode);
3456           else
3457             base = NULL_RTX;
3458
3459           if (base)
3460             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3461
3462           start_sequence ();
3463           /* To avoid splitting addressing modes, pretend that no cse will
3464              follow.  */
3465           old_cse_not_expected = cse_not_expected;
3466           cse_not_expected = true;
3467           addr = memory_address (Pmode, addr);
3468           cse_not_expected = old_cse_not_expected;
3469           seq = get_insns ();
3470           end_sequence ();
3471
3472           acost = seq_cost (seq);
3473           acost += address_cost (addr, Pmode);
3474
3475           if (!acost)
3476             acost = 1;
3477           costs[sym_p][var_p][off_p][rat_p] = acost;
3478         }
3479
3480       /* On some targets, it is quite expensive to load symbol to a register,
3481          which makes addresses that contain symbols look much more expensive.
3482          However, the symbol will have to be loaded in any case before the
3483          loop (and quite likely we have it in register already), so it does not
3484          make much sense to penalize them too heavily.  So make some final
3485          tweaks for the SYMBOL_PRESENT modes:
3486
3487          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3488          var is cheaper, use this mode with small penalty.
3489          If VAR_PRESENT is true, try whether the mode with
3490          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3491          if this is the case, use it.  */
3492       add_c = add_cost (Pmode);
3493       for (i = 0; i < 8; i++)
3494         {
3495           var_p = i & 1;
3496           off_p = (i >> 1) & 1;
3497           rat_p = (i >> 2) & 1;
3498
3499           acost = costs[0][1][off_p][rat_p] + 1;
3500           if (var_p)
3501             acost += add_c;
3502
3503           if (acost < costs[1][var_p][off_p][rat_p])
3504             costs[1][var_p][off_p][rat_p] = acost;
3505         }
3506
3507       if (dump_file && (dump_flags & TDF_DETAILS))
3508         {
3509           fprintf (dump_file, "Address costs:\n");
3510
3511           for (i = 0; i < 16; i++)
3512             {
3513               sym_p = i & 1;
3514               var_p = (i >> 1) & 1;
3515               off_p = (i >> 2) & 1;
3516               rat_p = (i >> 3) & 1;
3517
3518               fprintf (dump_file, "  ");
3519               if (sym_p)
3520                 fprintf (dump_file, "sym + ");
3521               if (var_p)
3522                 fprintf (dump_file, "var + ");
3523               if (off_p)
3524                 fprintf (dump_file, "cst + ");
3525               if (rat_p)
3526                 fprintf (dump_file, "rat * ");
3527
3528               acost = costs[sym_p][var_p][off_p][rat_p];
3529               fprintf (dump_file, "index costs %d\n", acost);
3530             }
3531           fprintf (dump_file, "\n");
3532         }
3533     }
3534
3535   bits = GET_MODE_BITSIZE (Pmode);
3536   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3537   offset &= mask;
3538   if ((offset >> (bits - 1) & 1))
3539     offset |= ~mask;
3540   s_offset = offset;
3541
3542   cost = 0;
3543   offset_p = (s_offset != 0
3544               && min_offset <= s_offset && s_offset <= max_offset);
3545   ratio_p = (ratio != 1
3546              && multiplier_allowed_in_address_p (ratio));
3547
3548   if (ratio != 1 && !ratio_p)
3549     cost += multiply_by_cost (ratio, Pmode);
3550
3551   if (s_offset && !offset_p && !symbol_present)
3552     {
3553       cost += add_cost (Pmode);
3554       var_present = true;
3555     }
3556
3557   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3558   return cost + acost;
3559 }
3560
3561 /* Estimates cost of forcing expression EXPR into a variable.  */
3562
3563 unsigned
3564 force_expr_to_var_cost (tree expr)
3565 {
3566   static bool costs_initialized = false;
3567   static unsigned integer_cost;
3568   static unsigned symbol_cost;
3569   static unsigned address_cost;
3570   tree op0, op1;
3571   unsigned cost0, cost1, cost;
3572   enum machine_mode mode;
3573
3574   if (!costs_initialized)
3575     {
3576       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3577       rtx x = gen_rtx_MEM (DECL_MODE (var),
3578                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3579       tree addr;
3580       tree type = build_pointer_type (integer_type_node);
3581
3582       integer_cost = computation_cost (build_int_cst (integer_type_node,
3583                                                       2000));
3584
3585       SET_DECL_RTL (var, x);
3586       TREE_STATIC (var) = 1;
3587       addr = build1 (ADDR_EXPR, type, var);
3588       symbol_cost = computation_cost (addr) + 1;
3589
3590       address_cost
3591         = computation_cost (build2 (PLUS_EXPR, type,
3592                                     addr,
3593                                     build_int_cst (type, 2000))) + 1;
3594       if (dump_file && (dump_flags & TDF_DETAILS))
3595         {
3596           fprintf (dump_file, "force_expr_to_var_cost:\n");
3597           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3598           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3599           fprintf (dump_file, "  address %d\n", (int) address_cost);
3600           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3601           fprintf (dump_file, "\n");
3602         }
3603
3604       costs_initialized = true;
3605     }
3606
3607   STRIP_NOPS (expr);
3608
3609   if (SSA_VAR_P (expr))
3610     return 0;
3611
3612   if (TREE_INVARIANT (expr))
3613     {
3614       if (TREE_CODE (expr) == INTEGER_CST)
3615         return integer_cost;
3616
3617       if (TREE_CODE (expr) == ADDR_EXPR)
3618         {
3619           tree obj = TREE_OPERAND (expr, 0);
3620
3621           if (TREE_CODE (obj) == VAR_DECL
3622               || TREE_CODE (obj) == PARM_DECL
3623               || TREE_CODE (obj) == RESULT_DECL)
3624             return symbol_cost;
3625         }
3626
3627       return address_cost;
3628     }
3629
3630   switch (TREE_CODE (expr))
3631     {
3632     case PLUS_EXPR:
3633     case MINUS_EXPR:
3634     case MULT_EXPR:
3635       op0 = TREE_OPERAND (expr, 0);
3636       op1 = TREE_OPERAND (expr, 1);
3637       STRIP_NOPS (op0);
3638       STRIP_NOPS (op1);
3639
3640       if (is_gimple_val (op0))
3641         cost0 = 0;
3642       else
3643         cost0 = force_expr_to_var_cost (op0);
3644
3645       if (is_gimple_val (op1))
3646         cost1 = 0;
3647       else
3648         cost1 = force_expr_to_var_cost (op1);
3649
3650       break;
3651
3652     default:
3653       /* Just an arbitrary value, FIXME.  */
3654       return target_spill_cost;
3655     }
3656
3657   mode = TYPE_MODE (TREE_TYPE (expr));
3658   switch (TREE_CODE (expr))
3659     {
3660     case PLUS_EXPR:
3661     case MINUS_EXPR:
3662       cost = add_cost (mode);
3663       break;
3664
3665     case MULT_EXPR:
3666       if (cst_and_fits_in_hwi (op0))
3667         cost = multiply_by_cost (int_cst_value (op0), mode);
3668       else if (cst_and_fits_in_hwi (op1))
3669         cost = multiply_by_cost (int_cst_value (op1), mode);
3670       else
3671         return target_spill_cost;
3672       break;
3673
3674     default:
3675       gcc_unreachable ();
3676     }
3677
3678   cost += cost0;
3679   cost += cost1;
3680
3681   /* Bound the cost by target_spill_cost.  The parts of complicated
3682      computations often are either loop invariant or at least can
3683      be shared between several iv uses, so letting this grow without
3684      limits would not give reasonable results.  */
3685   return cost < target_spill_cost ? cost : target_spill_cost;
3686 }
3687
3688 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3689    invariants the computation depends on.  */
3690
3691 static unsigned
3692 force_var_cost (struct ivopts_data *data,
3693                 tree expr, bitmap *depends_on)
3694 {
3695   if (depends_on)
3696     {
3697       fd_ivopts_data = data;
3698       walk_tree (&expr, find_depends, depends_on, NULL);
3699     }
3700
3701   return force_expr_to_var_cost (expr);
3702 }
3703
3704 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3705    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3706    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3707    invariants the computation depends on.  */
3708
3709 static unsigned
3710 split_address_cost (struct ivopts_data *data,
3711                     tree addr, bool *symbol_present, bool *var_present,
3712                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3713 {
3714   tree core;
3715   HOST_WIDE_INT bitsize;
3716   HOST_WIDE_INT bitpos;
3717   tree toffset;
3718   enum machine_mode mode;
3719   int unsignedp, volatilep;
3720
3721   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3722                               &unsignedp, &volatilep, false);
3723
3724   if (toffset != 0
3725       || bitpos % BITS_PER_UNIT != 0
3726       || TREE_CODE (core) != VAR_DECL)
3727     {
3728       *symbol_present = false;
3729       *var_present = true;
3730       fd_ivopts_data = data;
3731       walk_tree (&addr, find_depends, depends_on, NULL);
3732       return target_spill_cost;
3733     }
3734
3735   *offset += bitpos / BITS_PER_UNIT;
3736   if (TREE_STATIC (core)
3737       || DECL_EXTERNAL (core))
3738     {
3739       *symbol_present = true;
3740       *var_present = false;
3741       return 0;
3742     }
3743
3744   *symbol_present = false;
3745   *var_present = true;
3746   return 0;
3747 }
3748
3749 /* Estimates cost of expressing difference of addresses E1 - E2 as
3750    var + symbol + offset.  The value of offset is added to OFFSET,
3751    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3752    part is missing.  DEPENDS_ON is a set of the invariants the computation
3753    depends on.  */
3754
3755 static unsigned
3756 ptr_difference_cost (struct ivopts_data *data,
3757                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3758                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3759 {
3760   HOST_WIDE_INT diff = 0;
3761   unsigned cost;
3762
3763   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3764
3765   if (ptr_difference_const (e1, e2, &diff))
3766     {
3767       *offset += diff;
3768       *symbol_present = false;
3769       *var_present = false;
3770       return 0;
3771     }
3772
3773   if (e2 == integer_zero_node)
3774     return split_address_cost (data, TREE_OPERAND (e1, 0),
3775                                symbol_present, var_present, offset, depends_on);
3776
3777   *symbol_present = false;
3778   *var_present = true;
3779
3780   cost = force_var_cost (data, e1, depends_on);
3781   cost += force_var_cost (data, e2, depends_on);
3782   cost += add_cost (Pmode);
3783
3784   return cost;
3785 }
3786
3787 /* Estimates cost of expressing difference E1 - E2 as
3788    var + symbol + offset.  The value of offset is added to OFFSET,
3789    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3790    part is missing.  DEPENDS_ON is a set of the invariants the computation
3791    depends on.  */
3792
3793 static unsigned
3794 difference_cost (struct ivopts_data *data,
3795                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3796                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3797 {
3798   unsigned cost;
3799   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3800   unsigned HOST_WIDE_INT off1, off2;
3801
3802   e1 = strip_offset (e1, &off1);
3803   e2 = strip_offset (e2, &off2);
3804   *offset += off1 - off2;
3805
3806   STRIP_NOPS (e1);
3807   STRIP_NOPS (e2);
3808
3809   if (TREE_CODE (e1) == ADDR_EXPR)
3810     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3811                                 depends_on);
3812   *symbol_present = false;
3813
3814   if (operand_equal_p (e1, e2, 0))
3815     {
3816       *var_present = false;
3817       return 0;
3818     }
3819   *var_present = true;
3820   if (zero_p (e2))
3821     return force_var_cost (data, e1, depends_on);
3822
3823   if (zero_p (e1))
3824     {
3825       cost = force_var_cost (data, e2, depends_on);
3826       cost += multiply_by_cost (-1, mode);
3827
3828       return cost;
3829     }
3830
3831   cost = force_var_cost (data, e1, depends_on);
3832   cost += force_var_cost (data, e2, depends_on);
3833   cost += add_cost (mode);
3834
3835   return cost;
3836 }
3837
3838 /* Determines the cost of the computation by that USE is expressed
3839    from induction variable CAND.  If ADDRESS_P is true, we just need
3840    to create an address from it, otherwise we want to get it into
3841    register.  A set of invariants we depend on is stored in
3842    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3843
3844 static unsigned
3845 get_computation_cost_at (struct ivopts_data *data,
3846                          struct iv_use *use, struct iv_cand *cand,
3847                          bool address_p, bitmap *depends_on, tree at)
3848 {
3849   tree ubase = use->iv->base, ustep = use->iv->step;
3850   tree cbase, cstep;
3851   tree utype = TREE_TYPE (ubase), ctype;
3852   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3853   HOST_WIDE_INT ratio, aratio;
3854   bool var_present, symbol_present;
3855   unsigned cost = 0, n_sums;
3856
3857   *depends_on = NULL;
3858
3859   /* Only consider real candidates.  */
3860   if (!cand->iv)
3861     return INFTY;
3862
3863   cbase = cand->iv->base;
3864   cstep = cand->iv->step;
3865   ctype = TREE_TYPE (cbase);
3866
3867   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3868     {
3869       /* We do not have a precision to express the values of use.  */
3870       return INFTY;
3871     }
3872
3873   if (address_p)
3874     {
3875       /* Do not try to express address of an object with computation based
3876          on address of a different object.  This may cause problems in rtl
3877          level alias analysis (that does not expect this to be happening,
3878          as this is illegal in C), and would be unlikely to be useful
3879          anyway.  */
3880       if (use->iv->base_object
3881           && cand->iv->base_object
3882           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3883         return INFTY;
3884     }
3885
3886   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3887     {
3888       /* TODO -- add direct handling of this case.  */
3889       goto fallback;
3890     }
3891
3892   /* CSTEPI is removed from the offset in case statement is after the
3893      increment.  If the step is not constant, we use zero instead.
3894      This is a bit imprecise (there is the extra addition), but
3895      redundancy elimination is likely to transform the code so that
3896      it uses value of the variable before increment anyway,
3897      so it is not that much unrealistic.  */
3898   if (cst_and_fits_in_hwi (cstep))
3899     cstepi = int_cst_value (cstep);
3900   else
3901     cstepi = 0;
3902
3903   if (cst_and_fits_in_hwi (ustep)
3904       && cst_and_fits_in_hwi (cstep))
3905     {
3906       ustepi = int_cst_value (ustep);
3907
3908       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3909         return INFTY;
3910     }
3911   else
3912     {
3913       double_int rat;
3914
3915       if (!constant_multiple_of (ustep, cstep, &rat))
3916         return INFTY;
3917
3918       if (double_int_fits_in_shwi_p (rat))
3919         ratio = double_int_to_shwi (rat);
3920       else
3921         return INFTY;
3922     }
3923
3924   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3925      or ratio == 1, it is better to handle this like
3926
3927      ubase - ratio * cbase + ratio * var
3928
3929      (also holds in the case ratio == -1, TODO.  */
3930
3931   if (cst_and_fits_in_hwi (cbase))
3932     {
3933       offset = - ratio * int_cst_value (cbase);
3934       cost += difference_cost (data,
3935                                ubase, integer_zero_node,
3936                                &symbol_present, &var_present, &offset,
3937                                depends_on);
3938     }
3939   else if (ratio == 1)
3940     {
3941       cost += difference_cost (data,
3942                                ubase, cbase,
3943                                &symbol_present, &var_present, &offset,
3944                                depends_on);
3945     }
3946   else
3947     {
3948       cost += force_var_cost (data, cbase, depends_on);
3949       cost += add_cost (TYPE_MODE (ctype));
3950       cost += difference_cost (data,
3951                                ubase, integer_zero_node,
3952                                &symbol_present, &var_present, &offset,
3953                                depends_on);
3954     }
3955
3956   /* If we are after the increment, the value of the candidate is higher by
3957      one iteration.  */
3958   if (stmt_after_increment (data->current_loop, cand, at))
3959     offset -= ratio * cstepi;
3960
3961   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3962      (symbol/var/const parts may be omitted).  If we are looking for an address,
3963      find the cost of addressing this.  */
3964   if (address_p)
3965     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3966
3967   /* Otherwise estimate the costs for computing the expression.  */
3968   aratio = ratio > 0 ? ratio : -ratio;
3969   if (!symbol_present && !var_present && !offset)
3970     {
3971       if (ratio != 1)
3972         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3973
3974       return cost;
3975     }
3976
3977   if (aratio != 1)
3978     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3979
3980   n_sums = 1;
3981   if (var_present
3982       /* Symbol + offset should be compile-time computable.  */
3983       && (symbol_present || offset))
3984     n_sums++;
3985
3986   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3987
3988 fallback:
3989   {
3990     /* Just get the expression, expand it and measure the cost.  */
3991     tree comp = get_computation_at (data->current_loop, use, cand, at);
3992
3993     if (!comp)
3994       return INFTY;
3995
3996     if (address_p)
3997       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3998
3999     return computation_cost (comp);
4000   }
4001 }
4002
4003 /* Determines the cost of the computation by that USE is expressed
4004    from induction variable CAND.  If ADDRESS_P is true, we just need
4005    to create an address from it, otherwise we want to get it into
4006    register.  A set of invariants we depend on is stored in
4007    DEPENDS_ON.  */
4008
4009 static unsigned
4010 get_computation_cost (struct ivopts_data *data,
4011                       struct iv_use *use, struct iv_cand *cand,
4012                       bool address_p, bitmap *depends_on)
4013 {
4014   return get_computation_cost_at (data,
4015                                   use, cand, address_p, depends_on, use->stmt);
4016 }
4017
4018 /* Determines cost of basing replacement of USE on CAND in a generic
4019    expression.  */
4020
4021 static bool
4022 determine_use_iv_cost_generic (struct ivopts_data *data,
4023                                struct iv_use *use, struct iv_cand *cand)
4024 {
4025   bitmap depends_on;
4026   unsigned cost;
4027
4028   /* The simple case first -- if we need to express value of the preserved
4029      original biv, the cost is 0.  This also prevents us from counting the
4030      cost of increment twice -- once at this use and once in the cost of
4031      the candidate.  */
4032   if (cand->pos == IP_ORIGINAL
4033       && cand->incremented_at == use->stmt)
4034     {
4035       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4036       return true;
4037     }
4038
4039   cost = get_computation_cost (data, use, cand, false, &depends_on);
4040   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4041
4042   return cost != INFTY;
4043 }
4044
4045 /* Determines cost of basing replacement of USE on CAND in an address.  */
4046
4047 static bool
4048 determine_use_iv_cost_address (struct ivopts_data *data,
4049                                struct iv_use *use, struct iv_cand *cand)
4050 {
4051   bitmap depends_on;
4052   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
4053
4054   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4055
4056   return cost != INFTY;
4057 }
4058
4059 /* Computes value of induction variable IV in iteration NITER.  */
4060
4061 static tree
4062 iv_value (struct iv *iv, tree niter)
4063 {
4064   tree val;
4065   tree type = TREE_TYPE (iv->base);
4066
4067   niter = fold_convert (type, niter);
4068   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
4069
4070   return fold_build2 (PLUS_EXPR, type, iv->base, val);
4071 }
4072
4073 /* Computes value of candidate CAND at position AT in iteration NITER.  */
4074
4075 static tree
4076 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
4077 {
4078   tree val = iv_value (cand->iv, niter);
4079   tree type = TREE_TYPE (cand->iv->base);
4080
4081   if (stmt_after_increment (loop, cand, at))
4082     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
4083
4084   return val;
4085 }
4086
4087 /* Returns period of induction variable iv.  */
4088
4089 static tree
4090 iv_period (struct iv *iv)
4091 {
4092   tree step = iv->step, period, type;
4093   tree pow2div;
4094
4095   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4096
4097   /* Period of the iv is gcd (step, type range).  Since type range is power
4098      of two, it suffices to determine the maximum power of two that divides
4099      step.  */
4100   pow2div = num_ending_zeros (step);
4101   type = unsigned_type_for (TREE_TYPE (step));
4102
4103   period = build_low_bits_mask (type,
4104                                 (TYPE_PRECISION (type)
4105                                  - tree_low_cst (pow2div, 1)));
4106
4107   return period;
4108 }
4109
4110 /* Returns the comparison operator used when eliminating the iv USE.  */
4111
4112 static enum tree_code
4113 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4114 {
4115   struct loop *loop = data->current_loop;
4116   basic_block ex_bb;
4117   edge exit;
4118
4119   ex_bb = bb_for_stmt (use->stmt);
4120   exit = EDGE_SUCC (ex_bb, 0);
4121   if (flow_bb_inside_loop_p (loop, exit->dest))
4122     exit = EDGE_SUCC (ex_bb, 1);
4123
4124   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4125 }
4126
4127 /* Check whether it is possible to express the condition in USE by comparison
4128    of candidate CAND.  If so, store the value compared with to BOUND.  */
4129
4130 static bool
4131 may_eliminate_iv (struct ivopts_data *data,
4132                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4133 {
4134   basic_block ex_bb;
4135   edge exit;
4136   tree nit, nit_type;
4137   tree wider_type, period, per_type;
4138   struct loop *loop = data->current_loop;
4139
4140   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4141     return false;
4142
4143   /* For now works only for exits that dominate the loop latch.  TODO -- extend
4144      for other conditions inside loop body.  */
4145   ex_bb = bb_for_stmt (use->stmt);
4146   if (use->stmt != last_stmt (ex_bb)
4147       || TREE_CODE (use->stmt) != COND_EXPR)
4148     return false;
4149   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4150     return false;
4151
4152   exit = EDGE_SUCC (ex_bb, 0);
4153   if (flow_bb_inside_loop_p (loop, exit->dest))
4154     exit = EDGE_SUCC (ex_bb, 1);
4155   if (flow_bb_inside_loop_p (loop, exit->dest))
4156     return false;
4157
4158   nit = niter_for_exit (data, exit);
4159   if (!nit)
4160     return false;
4161
4162   nit_type = TREE_TYPE (nit);
4163
4164   /* Determine whether we may use the variable to test whether niter iterations
4165      elapsed.  This is the case iff the period of the induction variable is
4166      greater than the number of iterations.  */
4167   period = iv_period (cand->iv);
4168   if (!period)
4169     return false;
4170   per_type = TREE_TYPE (period);
4171
4172   wider_type = TREE_TYPE (period);
4173   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4174     wider_type = per_type;
4175   else
4176     wider_type = nit_type;
4177
4178   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4179                                       fold_convert (wider_type, period),
4180                                       fold_convert (wider_type, nit))))
4181     return false;
4182
4183   *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
4184   return true;
4185 }
4186
4187 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4188
4189 static bool
4190 determine_use_iv_cost_condition (struct ivopts_data *data,
4191                                  struct iv_use *use, struct iv_cand *cand)
4192 {
4193   tree bound = NULL_TREE, op, cond;
4194   bitmap depends_on = NULL;
4195   unsigned cost;
4196
4197   /* Only consider real candidates.  */
4198   if (!cand->iv)
4199     {
4200       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4201       return false;
4202     }
4203
4204   if (may_eliminate_iv (data, use, cand, &bound))
4205     {
4206       cost = force_var_cost (data, bound, &depends_on);
4207
4208       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4209       return cost != INFTY;
4210     }
4211
4212   /* The induction variable elimination failed; just express the original
4213      giv.  If it is compared with an invariant, note that we cannot get
4214      rid of it.  */
4215   cost = get_computation_cost (data, use, cand, false, &depends_on);
4216
4217   cond = *use->op_p;
4218   if (TREE_CODE (cond) != SSA_NAME)
4219     {
4220       op = TREE_OPERAND (cond, 0);
4221       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4222         op = TREE_OPERAND (cond, 1);
4223       if (TREE_CODE (op) == SSA_NAME)
4224         {
4225           op = get_iv (data, op)->base;
4226           fd_ivopts_data = data;
4227           walk_tree (&op, find_depends, &depends_on, NULL);
4228         }
4229     }
4230
4231   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4232   return cost != INFTY;
4233 }
4234
4235 /* Determines cost of basing replacement of USE on CAND.  Returns false
4236    if USE cannot be based on CAND.  */
4237
4238 static bool
4239 determine_use_iv_cost (struct ivopts_data *data,
4240                        struct iv_use *use, struct iv_cand *cand)
4241 {
4242   switch (use->type)
4243     {
4244     case USE_NONLINEAR_EXPR:
4245       return determine_use_iv_cost_generic (data, use, cand);
4246
4247     case USE_ADDRESS:
4248       return determine_use_iv_cost_address (data, use, cand);
4249
4250     case USE_COMPARE:
4251       return determine_use_iv_cost_condition (data, use, cand);
4252
4253     default:
4254       gcc_unreachable ();
4255     }
4256 }
4257
4258 /* Determines costs of basing the use of the iv on an iv candidate.  */
4259
4260 static void
4261 determine_use_iv_costs (struct ivopts_data *data)
4262 {
4263   unsigned i, j;
4264   struct iv_use *use;
4265   struct iv_cand *cand;
4266   bitmap to_clear = BITMAP_ALLOC (NULL);
4267
4268   alloc_use_cost_map (data);
4269
4270   for (i = 0; i < n_iv_uses (data); i++)
4271     {
4272       use = iv_use (data, i);
4273
4274       if (data->consider_all_candidates)
4275         {
4276           for (j = 0; j < n_iv_cands (data); j++)
4277             {
4278               cand = iv_cand (data, j);
4279               determine_use_iv_cost (data, use, cand);
4280             }
4281         }
4282       else
4283         {
4284           bitmap_iterator bi;
4285
4286           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4287             {
4288               cand = iv_cand (data, j);
4289               if (!determine_use_iv_cost (data, use, cand))
4290                 bitmap_set_bit (to_clear, j);
4291             }
4292
4293           /* Remove the candidates for that the cost is infinite from
4294              the list of related candidates.  */
4295           bitmap_and_compl_into (use->related_cands, to_clear);
4296           bitmap_clear (to_clear);
4297         }
4298     }
4299
4300   BITMAP_FREE (to_clear);
4301
4302   if (dump_file && (dump_flags & TDF_DETAILS))
4303     {
4304       fprintf (dump_file, "Use-candidate costs:\n");
4305
4306       for (i = 0; i < n_iv_uses (data); i++)
4307         {
4308           use = iv_use (data, i);
4309
4310           fprintf (dump_file, "Use %d:\n", i);
4311           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4312           for (j = 0; j < use->n_map_members; j++)
4313             {
4314               if (!use->cost_map[j].cand
4315                   || use->cost_map[j].cost == INFTY)
4316                 continue;
4317
4318               fprintf (dump_file, "  %d\t%d\t",
4319                        use->cost_map[j].cand->id,
4320                        use->cost_map[j].cost);
4321               if (use->cost_map[j].depends_on)
4322                 bitmap_print (dump_file,
4323                               use->cost_map[j].depends_on, "","");
4324               fprintf (dump_file, "\n");
4325             }
4326
4327           fprintf (dump_file, "\n");
4328         }
4329       fprintf (dump_file, "\n");
4330     }
4331 }
4332
4333 /* Determines cost of the candidate CAND.  */
4334
4335 static void
4336 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4337 {
4338   unsigned cost_base, cost_step;
4339   tree base;
4340
4341   if (!cand->iv)
4342     {
4343       cand->cost = 0;
4344       return;
4345     }
4346
4347   /* There are two costs associated with the candidate -- its increment
4348      and its initialization.  The second is almost negligible for any loop
4349      that rolls enough, so we take it just very little into account.  */
4350
4351   base = cand->iv->base;
4352   cost_base = force_var_cost (data, base, NULL);
4353   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4354
4355   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4356
4357   /* Prefer the original iv unless we may gain something by replacing it;
4358      this is not really relevant for artificial ivs created by other
4359      passes.  */
4360   if (cand->pos == IP_ORIGINAL
4361       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4362     cand->cost--;
4363
4364   /* Prefer not to insert statements into latch unless there are some
4365      already (so that we do not create unnecessary jumps).  */
4366   if (cand->pos == IP_END
4367       && empty_block_p (ip_end_pos (data->current_loop)))
4368     cand->cost++;
4369 }
4370
4371 /* Determines costs of computation of the candidates.  */
4372
4373 static void
4374 determine_iv_costs (struct ivopts_data *data)
4375 {
4376   unsigned i;
4377
4378   if (dump_file && (dump_flags & TDF_DETAILS))
4379     {
4380       fprintf (dump_file, "Candidate costs:\n");
4381       fprintf (dump_file, "  cand\tcost\n");
4382     }
4383
4384   for (i = 0; i < n_iv_cands (data); i++)
4385     {
4386       struct iv_cand *cand = iv_cand (data, i);
4387
4388       determine_iv_cost (data, cand);
4389
4390       if (dump_file && (dump_flags & TDF_DETAILS))
4391         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4392     }
4393
4394 if (dump_file && (dump_flags & TDF_DETAILS))
4395       fprintf (dump_file, "\n");
4396 }
4397
4398 /* Calculates cost for having SIZE induction variables.  */
4399
4400 static unsigned
4401 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4402 {
4403   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4404 }
4405
4406 /* For each size of the induction variable set determine the penalty.  */
4407
4408 static void
4409 determine_set_costs (struct ivopts_data *data)
4410 {
4411   unsigned j, n;
4412   tree phi, op;
4413   struct loop *loop = data->current_loop;
4414   bitmap_iterator bi;
4415
4416   /* We use the following model (definitely improvable, especially the
4417      cost function -- TODO):
4418
4419      We estimate the number of registers available (using MD data), name it A.
4420
4421      We estimate the number of registers used by the loop, name it U.  This
4422      number is obtained as the number of loop phi nodes (not counting virtual
4423      registers and bivs) + the number of variables from outside of the loop.
4424
4425      We set a reserve R (free regs that are used for temporary computations,
4426      etc.).  For now the reserve is a constant 3.
4427
4428      Let I be the number of induction variables.
4429
4430      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4431         make a lot of ivs without a reason).
4432      -- if A - R < U + I <= A, the cost is I * PRES_COST
4433      -- if U + I > A, the cost is I * PRES_COST and
4434         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4435
4436   if (dump_file && (dump_flags & TDF_DETAILS))
4437     {
4438       fprintf (dump_file, "Global costs:\n");
4439       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4440       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4441       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4442       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4443     }
4444
4445   n = 0;
4446   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4447     {
4448       op = PHI_RESULT (phi);
4449
4450       if (!is_gimple_reg (op))
4451         continue;
4452
4453       if (get_iv (data, op))
4454         continue;
4455
4456       n++;
4457     }
4458
4459   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4460     {
4461       struct version_info *info = ver_info (data, j);
4462
4463       if (info->inv_id && info->has_nonlin_use)
4464         n++;
4465     }
4466
4467   data->regs_used = n;
4468   if (dump_file && (dump_flags & TDF_DETAILS))
4469     fprintf (dump_file, "  regs_used %d\n", n);
4470
4471   if (dump_file && (dump_flags & TDF_DETAILS))
4472     {
4473       fprintf (dump_file, "  cost for size:\n");
4474       fprintf (dump_file, "  ivs\tcost\n");
4475       for (j = 0; j <= 2 * target_avail_regs; j++)
4476         fprintf (dump_file, "  %d\t%d\n", j,
4477                  ivopts_global_cost_for_size (data, j));
4478       fprintf (dump_file, "\n");
4479     }
4480 }
4481
4482 /* Returns true if A is a cheaper cost pair than B.  */
4483
4484 static bool
4485 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4486 {
4487   if (!a)
4488     return false;
4489
4490   if (!b)
4491     return true;
4492
4493   if (a->cost < b->cost)
4494     return true;
4495
4496   if (a->cost > b->cost)
4497     return false;
4498
4499   /* In case the costs are the same, prefer the cheaper candidate.  */
4500   if (a->cand->cost < b->cand->cost)
4501     return true;
4502
4503   return false;
4504 }
4505
4506 /* Computes the cost field of IVS structure.  */
4507
4508 static void
4509 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4510 {
4511   unsigned cost = 0;
4512
4513   cost += ivs->cand_use_cost;
4514   cost += ivs->cand_cost;
4515   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4516
4517   ivs->cost = cost;
4518 }
4519
4520 /* Remove invariants in set INVS to set IVS.  */
4521
4522 static void
4523 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4524 {
4525   bitmap_iterator bi;
4526   unsigned iid;
4527
4528   if (!invs)
4529     return;
4530
4531   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4532     {
4533       ivs->n_invariant_uses[iid]--;
4534       if (ivs->n_invariant_uses[iid] == 0)
4535         ivs->n_regs--;
4536     }
4537 }
4538
4539 /* Set USE not to be expressed by any candidate in IVS.  */
4540
4541 static void
4542 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4543                  struct iv_use *use)
4544 {
4545   unsigned uid = use->id, cid;
4546   struct cost_pair *cp;
4547
4548   cp = ivs->cand_for_use[uid];
4549   if (!cp)
4550     return;
4551   cid = cp->cand->id;
4552
4553   ivs->bad_uses++;
4554   ivs->cand_for_use[uid] = NULL;
4555   ivs->n_cand_uses[cid]--;
4556
4557   if (ivs->n_cand_uses[cid] == 0)
4558     {
4559       bitmap_clear_bit (ivs->cands, cid);
4560       /* Do not count the pseudocandidates.  */
4561       if (cp->cand->iv)
4562         ivs->n_regs--;
4563       ivs->n_cands--;
4564       ivs->cand_cost -= cp->cand->cost;
4565
4566       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4567     }
4568
4569   ivs->cand_use_cost -= cp->cost;
4570
4571   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4572   iv_ca_recount_cost (data, ivs);
4573 }
4574
4575 /* Add invariants in set INVS to set IVS.  */
4576
4577 static void
4578 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4579 {
4580   bitmap_iterator bi;
4581   unsigned iid;
4582
4583   if (!invs)
4584     return;
4585
4586   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4587     {
4588       ivs->n_invariant_uses[iid]++;
4589       if (ivs->n_invariant_uses[iid] == 1)
4590         ivs->n_regs++;
4591     }
4592 }
4593
4594 /* Set cost pair for USE in set IVS to CP.  */
4595
4596 static void
4597 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4598               struct iv_use *use, struct cost_pair *cp)
4599 {
4600   unsigned uid = use->id, cid;
4601
4602   if (ivs->cand_for_use[uid] == cp)
4603     return;
4604
4605   if (ivs->cand_for_use[uid])
4606     iv_ca_set_no_cp (data, ivs, use);
4607
4608   if (cp)
4609     {
4610       cid = cp->cand->id;
4611
4612       ivs->bad_uses--;
4613       ivs->cand_for_use[uid] = cp;
4614       ivs->n_cand_uses[cid]++;
4615       if (ivs->n_cand_uses[cid] == 1)
4616         {
4617           bitmap_set_bit (ivs->cands, cid);
4618           /* Do not count the pseudocandidates.  */
4619           if (cp->cand->iv)
4620             ivs->n_regs++;
4621           ivs->n_cands++;
4622           ivs->cand_cost += cp->cand->cost;
4623
4624           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4625         }
4626
4627       ivs->cand_use_cost += cp->cost;
4628       iv_ca_set_add_invariants (ivs, cp->depends_on);
4629       iv_ca_recount_cost (data, ivs);
4630     }
4631 }
4632
4633 /* Extend set IVS by expressing USE by some of the candidates in it
4634    if possible.  */
4635
4636 static void
4637 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4638                struct iv_use *use)
4639 {
4640   struct cost_pair *best_cp = NULL, *cp;
4641   bitmap_iterator bi;
4642   unsigned i;
4643
4644   gcc_assert (ivs->upto >= use->id);
4645
4646   if (ivs->upto == use->id)
4647     {
4648       ivs->upto++;
4649       ivs->bad_uses++;
4650     }
4651
4652   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4653     {
4654       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4655
4656       if (cheaper_cost_pair (cp, best_cp))
4657         best_cp = cp;
4658     }
4659
4660   iv_ca_set_cp (data, ivs, use, best_cp);
4661 }
4662
4663 /* Get cost for assignment IVS.  */
4664
4665 static unsigned
4666 iv_ca_cost (struct iv_ca *ivs)
4667 {
4668   return (ivs->bad_uses ? INFTY : ivs->cost);
4669 }
4670
4671 /* Returns true if all dependences of CP are among invariants in IVS.  */
4672
4673 static bool
4674 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4675 {
4676   unsigned i;
4677   bitmap_iterator bi;
4678
4679   if (!cp->depends_on)
4680     return true;
4681
4682   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4683     {
4684       if (ivs->n_invariant_uses[i] == 0)
4685         return false;
4686     }
4687
4688   return true;
4689 }
4690
4691 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4692    it before NEXT_CHANGE.  */
4693
4694 static struct iv_ca_delta *
4695 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4696                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4697 {
4698   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4699
4700   change->use = use;
4701   change->old_cp = old_cp;
4702   change->new_cp = new_cp;
4703   change->next_change = next_change;
4704
4705   return change;
4706 }
4707
4708 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4709    are rewritten.  */
4710
4711 static struct iv_ca_delta *
4712 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4713 {
4714   struct iv_ca_delta *last;
4715
4716   if (!l2)
4717     return l1;
4718
4719   if (!l1)
4720     return l2;
4721
4722   for (last = l1; last->next_change; last = last->next_change)
4723     continue;
4724   last->next_change = l2;
4725
4726   return l1;
4727 }
4728
4729 /* Returns candidate by that USE is expressed in IVS.  */
4730
4731 static struct cost_pair *
4732 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4733 {
4734   return ivs->cand_for_use[use->id];
4735 }
4736
4737 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4738
4739 static struct iv_ca_delta *
4740 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4741 {
4742   struct iv_ca_delta *act, *next, *prev = NULL;
4743   struct cost_pair *tmp;
4744
4745   for (act = delta; act; act = next)
4746     {
4747       next = act->next_change;
4748       act->next_change = prev;
4749       prev = act;
4750
4751       tmp = act->old_cp;
4752       act->old_cp = act->new_cp;
4753       act->new_cp = tmp;
4754     }
4755
4756   return prev;
4757 }
4758
4759 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4760    reverted instead.  */
4761
4762 static void
4763 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4764                     struct iv_ca_delta *delta, bool forward)
4765 {
4766   struct cost_pair *from, *to;
4767   struct iv_ca_delta *act;
4768
4769   if (!forward)
4770     delta = iv_ca_delta_reverse (delta);
4771
4772   for (act = delta; act; act = act->next_change)
4773     {
4774       from = act->old_cp;
4775       to = act->new_cp;
4776       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4777       iv_ca_set_cp (data, ivs, act->use, to);
4778     }
4779
4780   if (!forward)
4781     iv_ca_delta_reverse (delta);
4782 }
4783
4784 /* Returns true if CAND is used in IVS.  */
4785
4786 static bool
4787 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4788 {
4789   return ivs->n_cand_uses[cand->id] > 0;
4790 }
4791
4792 /* Returns number of induction variable candidates in the set IVS.  */
4793
4794 static unsigned
4795 iv_ca_n_cands (struct iv_ca *ivs)
4796 {
4797   return ivs->n_cands;
4798 }
4799
4800 /* Free the list of changes DELTA.  */
4801
4802 static void
4803 iv_ca_delta_free (struct iv_ca_delta **delta)
4804 {
4805   struct iv_ca_delta *act, *next;
4806
4807   for (act = *delta; act; act = next)
4808     {
4809       next = act->next_change;
4810       free (act);
4811     }
4812
4813   *delta = NULL;
4814 }
4815
4816 /* Allocates new iv candidates assignment.  */
4817
4818 static struct iv_ca *
4819 iv_ca_new (struct ivopts_data *data)
4820 {
4821   struct iv_ca *nw = XNEW (struct iv_ca);
4822
4823   nw->upto = 0;
4824   nw->bad_uses = 0;
4825   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4826   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4827   nw->cands = BITMAP_ALLOC (NULL);
4828   nw->n_cands = 0;
4829   nw->n_regs = 0;
4830   nw->cand_use_cost = 0;
4831   nw->cand_cost = 0;
4832   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4833   nw->cost = 0;
4834
4835   return nw;
4836 }
4837
4838 /* Free memory occupied by the set IVS.  */
4839
4840 static void
4841 iv_ca_free (struct iv_ca **ivs)
4842 {
4843   free ((*ivs)->cand_for_use);
4844   free ((*ivs)->n_cand_uses);
4845   BITMAP_FREE ((*ivs)->cands);
4846   free ((*ivs)->n_invariant_uses);
4847   free (*ivs);
4848   *ivs = NULL;
4849 }
4850
4851 /* Dumps IVS to FILE.  */
4852
4853 static void
4854 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4855 {
4856   const char *pref = "  invariants ";
4857   unsigned i;
4858
4859   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4860   bitmap_print (file, ivs->cands, "  candidates ","\n");
4861
4862   for (i = 1; i <= data->max_inv_id; i++)
4863     if (ivs->n_invariant_uses[i])
4864       {
4865         fprintf (file, "%s%d", pref, i);
4866         pref = ", ";
4867       }
4868   fprintf (file, "\n");
4869 }
4870
4871 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4872    new set, and store differences in DELTA.  Number of induction variables
4873    in the new set is stored to N_IVS.  */
4874
4875 static unsigned
4876 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4877               struct iv_cand *cand, struct iv_ca_delta **delta,
4878               unsigned *n_ivs)
4879 {
4880   unsigned i, cost;
4881   struct iv_use *use;
4882   struct cost_pair *old_cp, *new_cp;
4883
4884   *delta = NULL;
4885   for (i = 0; i < ivs->upto; i++)
4886     {
4887       use = iv_use (data, i);
4888       old_cp = iv_ca_cand_for_use (ivs, use);
4889
4890       if (old_cp
4891           && old_cp->cand == cand)
4892         continue;
4893
4894       new_cp = get_use_iv_cost (data, use, cand);
4895       if (!new_cp)
4896         continue;
4897
4898       if (!iv_ca_has_deps (ivs, new_cp))
4899         continue;
4900
4901       if (!cheaper_cost_pair (new_cp, old_cp))
4902         continue;
4903
4904       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4905     }
4906
4907   iv_ca_delta_commit (data, ivs, *delta, true);
4908   cost = iv_ca_cost (ivs);
4909   if (n_ivs)
4910     *n_ivs = iv_ca_n_cands (ivs);
4911   iv_ca_delta_commit (data, ivs, *delta, false);
4912
4913   return cost;
4914 }
4915
4916 /* Try narrowing set IVS by removing CAND.  Return the cost of
4917    the new set and store the differences in DELTA.  */
4918
4919 static unsigned
4920 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4921               struct iv_cand *cand, struct iv_ca_delta **delta)
4922 {
4923   unsigned i, ci;
4924   struct iv_use *use;
4925   struct cost_pair *old_cp, *new_cp, *cp;
4926   bitmap_iterator bi;
4927   struct iv_cand *cnd;
4928   unsigned cost;
4929
4930   *delta = NULL;
4931   for (i = 0; i < n_iv_uses (data); i++)
4932     {
4933       use = iv_use (data, i);
4934
4935       old_cp = iv_ca_cand_for_use (ivs, use);
4936       if (old_cp->cand != cand)
4937         continue;
4938
4939       new_cp = NULL;
4940
4941       if (data->consider_all_candidates)
4942         {
4943           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4944             {
4945               if (ci == cand->id)
4946                 continue;
4947
4948               cnd = iv_cand (data, ci);
4949
4950               cp = get_use_iv_cost (data, use, cnd);
4951               if (!cp)
4952                 continue;
4953               if (!iv_ca_has_deps (ivs, cp))
4954                 continue;
4955
4956               if (!cheaper_cost_pair (cp, new_cp))
4957                 continue;
4958
4959               new_cp = cp;
4960             }
4961         }
4962       else
4963         {
4964           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4965             {
4966               if (ci == cand->id)
4967                 continue;
4968
4969               cnd = iv_cand (data, ci);
4970
4971               cp = get_use_iv_cost (data, use, cnd);
4972               if (!cp)
4973                 continue;
4974               if (!iv_ca_has_deps (ivs, cp))
4975                 continue;
4976
4977               if (!cheaper_cost_pair (cp, new_cp))
4978                 continue;
4979
4980               new_cp = cp;
4981             }
4982         }
4983
4984       if (!new_cp)
4985         {
4986           iv_ca_delta_free (delta);
4987           return INFTY;
4988         }
4989
4990       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4991     }
4992
4993   iv_ca_delta_commit (data, ivs, *delta, true);
4994   cost = iv_ca_cost (ivs);
4995   iv_ca_delta_commit (data, ivs, *delta, false);
4996
4997   return cost;
4998 }
4999
5000 /* Try optimizing the set of candidates IVS by removing candidates different
5001    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5002    differences in DELTA.  */
5003
5004 static unsigned
5005 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5006              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5007 {
5008   bitmap_iterator bi;
5009   struct iv_ca_delta *act_delta, *best_delta;
5010   unsigned i, best_cost, acost;
5011   struct iv_cand *cand;
5012
5013   best_delta = NULL;
5014   best_cost = iv_ca_cost (ivs);
5015
5016   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5017     {
5018       cand = iv_cand (data, i);
5019
5020       if (cand == except_cand)
5021         continue;
5022
5023       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5024
5025       if (acost < best_cost)
5026         {
5027           best_cost = acost;
5028           iv_ca_delta_free (&best_delta);
5029           best_delta = act_delta;
5030         }
5031       else
5032         iv_ca_delta_free (&act_delta);
5033     }
5034
5035   if (!best_delta)
5036     {
5037       *delta = NULL;
5038       return best_cost;
5039     }
5040
5041   /* Recurse to possibly remove other unnecessary ivs.  */
5042   iv_ca_delta_commit (data, ivs, best_delta, true);
5043   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5044   iv_ca_delta_commit (data, ivs, best_delta, false);
5045   *delta = iv_ca_delta_join (best_delta, *delta);
5046   return best_cost;
5047 }
5048
5049 /* Tries to extend the sets IVS in the best possible way in order
5050    to express the USE.  */
5051
5052 static bool
5053 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5054                   struct iv_use *use)
5055 {
5056   unsigned best_cost, act_cost;
5057   unsigned i;
5058   bitmap_iterator bi;
5059   struct iv_cand *cand;
5060   struct iv_ca_delta *best_delta = NULL, *act_delta;
5061   struct cost_pair *cp;
5062
5063   iv_ca_add_use (data, ivs, use);
5064   best_cost = iv_ca_cost (ivs);
5065
5066   cp = iv_ca_cand_for_use (ivs, use);
5067   if (cp)
5068     {
5069       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5070       iv_ca_set_no_cp (data, ivs, use);
5071     }
5072
5073   /* First try important candidates.  Only if it fails, try the specific ones.
5074      Rationale -- in loops with many variables the best choice often is to use
5075      just one generic biv.  If we added here many ivs specific to the uses,
5076      the optimization algorithm later would be likely to get stuck in a local
5077      minimum, thus causing us to create too many ivs.  The approach from
5078      few ivs to more seems more likely to be successful -- starting from few
5079      ivs, replacing an expensive use by a specific iv should always be a
5080      win.  */
5081   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5082     {
5083       cand = iv_cand (data, i);
5084
5085       if (iv_ca_cand_used_p (ivs, cand))
5086         continue;
5087
5088       cp = get_use_iv_cost (data, use, cand);
5089       if (!cp)
5090         continue;
5091
5092       iv_ca_set_cp (data, ivs, use, cp);
5093       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5094       iv_ca_set_no_cp (data, ivs, use);
5095       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5096
5097       if (act_cost < best_cost)
5098         {
5099           best_cost = act_cost;
5100
5101           iv_ca_delta_free (&best_delta);
5102           best_delta = act_delta;
5103         }
5104       else
5105         iv_ca_delta_free (&act_delta);
5106     }
5107
5108   if (best_cost == INFTY)
5109     {
5110       for (i = 0; i < use->n_map_members; i++)
5111         {
5112           cp = use->cost_map + i;
5113           cand = cp->cand;
5114           if (!cand)
5115             continue;
5116
5117           /* Already tried this.  */
5118           if (cand->important)
5119             continue;
5120
5121           if (iv_ca_cand_used_p (ivs, cand))
5122             continue;
5123
5124           act_delta = NULL;
5125           iv_ca_set_cp (data, ivs, use, cp);
5126           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5127           iv_ca_set_no_cp (data, ivs, use);
5128           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5129                                        cp, act_delta);
5130
5131           if (act_cost < best_cost)
5132             {
5133               best_cost = act_cost;
5134
5135               if (best_delta)
5136                 iv_ca_delta_free (&best_delta);
5137               best_delta = act_delta;
5138             }
5139           else
5140             iv_ca_delta_free (&act_delta);
5141         }
5142     }
5143
5144   iv_ca_delta_commit (data, ivs, best_delta, true);
5145   iv_ca_delta_free (&best_delta);
5146
5147   return (best_cost != INFTY);
5148 }
5149
5150 /* Finds an initial assignment of candidates to uses.  */
5151
5152 static struct iv_ca *
5153 get_initial_solution (struct ivopts_data *data)
5154 {
5155   struct iv_ca *ivs = iv_ca_new (data);
5156   unsigned i;
5157
5158   for (i = 0; i < n_iv_uses (data); i++)
5159     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5160       {
5161         iv_ca_free (&ivs);
5162         return NULL;
5163       }
5164
5165   return ivs;
5166 }
5167
5168 /* Tries to improve set of induction variables IVS.  */
5169
5170 static bool
5171 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5172 {
5173   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5174   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5175   struct iv_cand *cand;
5176
5177   /* Try extending the set of induction variables by one.  */
5178   for (i = 0; i < n_iv_cands (data); i++)
5179     {
5180       cand = iv_cand (data, i);
5181
5182       if (iv_ca_cand_used_p (ivs, cand))
5183         continue;
5184
5185       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5186       if (!act_delta)
5187         continue;
5188
5189       /* If we successfully added the candidate and the set is small enough,
5190          try optimizing it by removing other candidates.  */
5191       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5192         {
5193           iv_ca_delta_commit (data, ivs, act_delta, true);
5194           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5195           iv_ca_delta_commit (data, ivs, act_delta, false);
5196           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5197         }
5198
5199       if (acost < best_cost)
5200         {
5201           best_cost = acost;
5202           iv_ca_delta_free (&best_delta);
5203           best_delta = act_delta;
5204         }
5205       else
5206         iv_ca_delta_free (&act_delta);
5207     }
5208
5209   if (!best_delta)
5210     {
5211       /* Try removing the candidates from the set instead.  */
5212       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5213
5214       /* Nothing more we can do.  */
5215       if (!best_delta)
5216         return false;
5217     }
5218
5219   iv_ca_delta_commit (data, ivs, best_delta, true);
5220   gcc_assert (best_cost == iv_ca_cost (ivs));
5221   iv_ca_delta_free (&best_delta);
5222   return true;
5223 }
5224
5225 /* Attempts to find the optimal set of induction variables.  We do simple
5226    greedy heuristic -- we try to replace at most one candidate in the selected
5227    solution and remove the unused ivs while this improves the cost.  */
5228
5229 static struct iv_ca *
5230 find_optimal_iv_set (struct ivopts_data *data)
5231 {
5232   unsigned i;
5233   struct iv_ca *set;
5234   struct iv_use *use;
5235
5236   /* Get the initial solution.  */
5237   set = get_initial_solution (data);
5238   if (!set)
5239     {
5240       if (dump_file && (dump_flags & TDF_DETAILS))
5241         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5242       return NULL;
5243     }
5244
5245   if (dump_file && (dump_flags & TDF_DETAILS))
5246     {
5247       fprintf (dump_file, "Initial set of candidates:\n");
5248       iv_ca_dump (data, dump_file, set);
5249     }
5250
5251   while (try_improve_iv_set (data, set))
5252     {
5253       if (dump_file && (dump_flags & TDF_DETAILS))
5254         {
5255           fprintf (dump_file, "Improved to:\n");
5256           iv_ca_dump (data, dump_file, set);
5257         }
5258     }
5259
5260   if (dump_file && (dump_flags & TDF_DETAILS))
5261     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5262
5263   for (i = 0; i < n_iv_uses (data); i++)
5264     {
5265       use = iv_use (data, i);
5266       use->selected = iv_ca_cand_for_use (set, use)->cand;
5267     }
5268
5269   return set;
5270 }
5271
5272 /* Creates a new induction variable corresponding to CAND.  */
5273
5274 static void
5275 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5276 {
5277   block_stmt_iterator incr_pos;
5278   tree base;
5279   bool after = false;
5280
5281   if (!cand->iv)
5282     return;
5283
5284   switch (cand->pos)
5285     {
5286     case IP_NORMAL:
5287       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5288       break;
5289
5290     case IP_END:
5291       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5292       after = true;
5293       break;
5294
5295     case IP_ORIGINAL:
5296       /* Mark that the iv is preserved.  */
5297       name_info (data, cand->var_before)->preserve_biv = true;
5298       name_info (data, cand->var_after)->preserve_biv = true;
5299
5300       /* Rewrite the increment so that it uses var_before directly.  */
5301       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5302
5303       return;
5304     }
5305
5306   gimple_add_tmp_var (cand->var_before);
5307   add_referenced_var (cand->var_before);
5308
5309   base = unshare_expr (cand->iv->base);
5310
5311   create_iv (base, unshare_expr (cand->iv->step),
5312              cand->var_before, data->current_loop,
5313              &incr_pos, after, &cand->var_before, &cand->var_after);
5314 }
5315
5316 /* Creates new induction variables described in SET.  */
5317
5318 static void
5319 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5320 {
5321   unsigned i;
5322   struct iv_cand *cand;
5323   bitmap_iterator bi;
5324
5325   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5326     {
5327       cand = iv_cand (data, i);
5328       create_new_iv (data, cand);
5329     }
5330 }
5331
5332 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5333    is true, remove also the ssa name defined by the statement.  */
5334
5335 static void
5336 remove_statement (tree stmt, bool including_defined_name)
5337 {
5338   if (TREE_CODE (stmt) == PHI_NODE)
5339     {
5340       if (!including_defined_name)
5341         {
5342           /* Prevent the ssa name defined by the statement from being removed.  */
5343           SET_PHI_RESULT (stmt, NULL);
5344         }
5345       remove_phi_node (stmt, NULL_TREE);
5346     }
5347   else
5348     {
5349       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5350
5351       bsi_remove (&bsi, true);
5352     }
5353 }
5354
5355 /* Rewrites USE (definition of iv used in a nonlinear expression)
5356    using candidate CAND.  */
5357
5358 static void
5359 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5360                             struct iv_use *use, struct iv_cand *cand)
5361 {
5362   tree comp;
5363   tree op, stmts, tgt, ass;
5364   block_stmt_iterator bsi, pbsi;
5365
5366   /* An important special case -- if we are asked to express value of
5367      the original iv by itself, just exit; there is no need to
5368      introduce a new computation (that might also need casting the
5369      variable to unsigned and back).  */
5370   if (cand->pos == IP_ORIGINAL
5371       && cand->incremented_at == use->stmt)
5372     {
5373       tree step, ctype, utype;
5374       enum tree_code incr_code = PLUS_EXPR;
5375
5376       gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5377       gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5378
5379       step = cand->iv->step;
5380       ctype = TREE_TYPE (step);
5381       utype = TREE_TYPE (cand->var_after);
5382       if (TREE_CODE (step) == NEGATE_EXPR)
5383         {
5384           incr_code = MINUS_EXPR;
5385           step = TREE_OPERAND (step, 0);
5386         }
5387
5388       /* Check whether we may leave the computation unchanged.
5389          This is the case only if it does not rely on other
5390          computations in the loop -- otherwise, the computation
5391          we rely upon may be removed in remove_unused_ivs,
5392          thus leading to ICE.  */
5393       op = TREE_OPERAND (use->stmt, 1);
5394       if (TREE_CODE (op) == PLUS_EXPR
5395           || TREE_CODE (op) == MINUS_EXPR)
5396         {
5397           if (TREE_OPERAND (op, 0) == cand->var_before)
5398             op = TREE_OPERAND (op, 1);
5399           else if (TREE_CODE (op) == PLUS_EXPR
5400                    && TREE_OPERAND (op, 1) == cand->var_before)
5401             op = TREE_OPERAND (op, 0);
5402           else
5403             op = NULL_TREE;
5404         }
5405       else
5406         op = NULL_TREE;
5407
5408       if (op
5409           && (TREE_CODE (op) == INTEGER_CST
5410               || operand_equal_p (op, step, 0)))
5411         return;
5412
5413       /* Otherwise, add the necessary computations to express
5414          the iv.  */
5415       op = fold_convert (ctype, cand->var_before);
5416       comp = fold_convert (utype,
5417                            build2 (incr_code, ctype, op,
5418                                    unshare_expr (step)));
5419     }
5420   else
5421     comp = get_computation (data->current_loop, use, cand);
5422
5423   switch (TREE_CODE (use->stmt))
5424     {
5425     case PHI_NODE:
5426       tgt = PHI_RESULT (use->stmt);
5427
5428       /* If we should keep the biv, do not replace it.  */
5429       if (name_info (data, tgt)->preserve_biv)
5430         return;
5431
5432       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5433       while (!bsi_end_p (pbsi)
5434              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5435         {
5436           bsi = pbsi;
5437           bsi_next (&pbsi);
5438         }
5439       break;
5440
5441     case MODIFY_EXPR:
5442       tgt = TREE_OPERAND (use->stmt, 0);
5443       bsi = bsi_for_stmt (use->stmt);
5444       break;
5445
5446     default:
5447       gcc_unreachable ();
5448     }
5449
5450   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5451
5452   if (TREE_CODE (use->stmt) == PHI_NODE)
5453     {
5454       if (stmts)
5455         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5456       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5457       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5458       remove_statement (use->stmt, false);
5459       SSA_NAME_DEF_STMT (tgt) = ass;
5460     }
5461   else
5462     {
5463       if (stmts)
5464         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5465       TREE_OPERAND (use->stmt, 1) = op;
5466     }
5467 }
5468
5469 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5470    for_each_index.  */
5471
5472 static bool
5473 idx_remove_ssa_names (tree base, tree *idx,
5474                       void *data ATTRIBUTE_UNUSED)
5475 {
5476   tree *op;
5477
5478   if (TREE_CODE (*idx) == SSA_NAME)
5479     *idx = SSA_NAME_VAR (*idx);
5480
5481   if (TREE_CODE (base) == ARRAY_REF)
5482     {
5483       op = &TREE_OPERAND (base, 2);
5484       if (*op
5485           && TREE_CODE (*op) == SSA_NAME)
5486         *op = SSA_NAME_VAR (*op);
5487       op = &TREE_OPERAND (base, 3);
5488       if (*op
5489           && TREE_CODE (*op) == SSA_NAME)
5490         *op = SSA_NAME_VAR (*op);
5491     }
5492
5493   return true;
5494 }
5495
5496 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5497
5498 static tree
5499 unshare_and_remove_ssa_names (tree ref)
5500 {
5501   ref = unshare_expr (ref);
5502   for_each_index (&ref, idx_remove_ssa_names, NULL);
5503
5504   return ref;
5505 }
5506
5507 /* Extract the alias analysis info for the memory reference REF.  There are
5508    several ways how this information may be stored and what precisely is
5509    its semantics depending on the type of the reference, but there always is
5510    somewhere hidden one _DECL node that is used to determine the set of
5511    virtual operands for the reference.  The code below deciphers this jungle
5512    and extracts this single useful piece of information.  */
5513
5514 static tree
5515 get_ref_tag (tree ref, tree orig)
5516 {
5517   tree var = get_base_address (ref);
5518   tree aref = NULL_TREE, tag, sv;
5519   HOST_WIDE_INT offset, size, maxsize;
5520
5521   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5522     {
5523       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5524       if (ref)
5525         break;
5526     }
5527
5528   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5529     return unshare_expr (sv);
5530
5531   if (!var)
5532     return NULL_TREE;
5533
5534   if (TREE_CODE (var) == INDIRECT_REF)
5535     {
5536       /* If the base is a dereference of a pointer, first check its name memory
5537          tag.  If it does not have one, use its symbol memory tag.  */
5538       var = TREE_OPERAND (var, 0);
5539       if (TREE_CODE (var) != SSA_NAME)
5540         return NULL_TREE;
5541
5542       if (SSA_NAME_PTR_INFO (var))
5543         {
5544           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5545           if (tag)
5546             return tag;
5547         }
5548
5549       var = SSA_NAME_VAR (var);
5550       tag = var_ann (var)->symbol_mem_tag;
5551       gcc_assert (tag != NULL_TREE);
5552       return tag;
5553     }
5554   else
5555     {
5556       if (!DECL_P (var))
5557         return NULL_TREE;
5558
5559       tag = var_ann (var)->symbol_mem_tag;
5560       if (tag)
5561         return tag;
5562
5563       return var;
5564     }
5565 }
5566
5567 /* Copies the reference information from OLD_REF to NEW_REF.  */
5568
5569 static void
5570 copy_ref_info (tree new_ref, tree old_ref)
5571 {
5572   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5573     copy_mem_ref_info (new_ref, old_ref);
5574   else
5575     {
5576       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5577       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5578     }
5579 }
5580
5581 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5582
5583 static void
5584 rewrite_use_address (struct ivopts_data *data,
5585                      struct iv_use *use, struct iv_cand *cand)
5586 {
5587   struct affine_tree_combination aff;
5588   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5589   tree ref;
5590
5591   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5592   unshare_aff_combination (&aff);
5593
5594   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5595   copy_ref_info (ref, *use->op_p);
5596   *use->op_p = ref;
5597 }
5598
5599 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5600    candidate CAND.  */
5601
5602 static void
5603 rewrite_use_compare (struct ivopts_data *data,
5604                      struct iv_use *use, struct iv_cand *cand)
5605 {
5606   tree comp;
5607   tree *op_p, cond, op, stmts, bound;
5608   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5609   enum tree_code compare;
5610   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5611
5612   bound = cp->value;
5613   if (bound)
5614     {
5615       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5616       tree var_type = TREE_TYPE (var);
5617
5618       compare = iv_elimination_compare (data, use);
5619       bound = fold_convert (var_type, bound);
5620       op = force_gimple_operand (unshare_expr (bound), &stmts,
5621                                  true, NULL_TREE);
5622
5623       if (stmts)
5624         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5625
5626       *use->op_p = build2 (compare, boolean_type_node, var, op);
5627       update_stmt (use->stmt);
5628       return;
5629     }
5630
5631   /* The induction variable elimination failed; just express the original
5632      giv.  */
5633   comp = get_computation (data->current_loop, use, cand);
5634
5635   cond = *use->op_p;
5636   op_p = &TREE_OPERAND (cond, 0);
5637   if (TREE_CODE (*op_p) != SSA_NAME
5638       || zero_p (get_iv (data, *op_p)->step))
5639     op_p = &TREE_OPERAND (cond, 1);
5640
5641   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5642   if (stmts)
5643     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5644
5645   *op_p = op;
5646 }
5647
5648 /* Rewrites USE using candidate CAND.  */
5649
5650 static void
5651 rewrite_use (struct ivopts_data *data,
5652              struct iv_use *use, struct iv_cand *cand)
5653 {
5654   switch (use->type)
5655     {
5656       case USE_NONLINEAR_EXPR:
5657         rewrite_use_nonlinear_expr (data, use, cand);
5658         break;
5659
5660       case USE_ADDRESS:
5661         rewrite_use_address (data, use, cand);
5662         break;
5663
5664       case USE_COMPARE:
5665         rewrite_use_compare (data, use, cand);
5666         break;
5667
5668       default:
5669         gcc_unreachable ();
5670     }
5671   mark_new_vars_to_rename (use->stmt);
5672 }
5673
5674 /* Rewrite the uses using the selected induction variables.  */
5675
5676 static void
5677 rewrite_uses (struct ivopts_data *data)
5678 {
5679   unsigned i;
5680   struct iv_cand *cand;
5681   struct iv_use *use;
5682
5683   for (i = 0; i < n_iv_uses (data); i++)
5684     {
5685       use = iv_use (data, i);
5686       cand = use->selected;
5687       gcc_assert (cand);
5688
5689       rewrite_use (data, use, cand);
5690     }
5691 }
5692
5693 /* Removes the ivs that are not used after rewriting.  */
5694
5695 static void
5696 remove_unused_ivs (struct ivopts_data *data)
5697 {
5698   unsigned j;
5699   bitmap_iterator bi;
5700
5701   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5702     {
5703       struct version_info *info;
5704
5705       info = ver_info (data, j);
5706       if (info->iv
5707           && !zero_p (info->iv->step)
5708           && !info->inv_id
5709           && !info->iv->have_use_for
5710           && !info->preserve_biv)
5711         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5712     }
5713 }
5714
5715 /* Frees data allocated by the optimization of a single loop.  */
5716
5717 static void
5718 free_loop_data (struct ivopts_data *data)
5719 {
5720   unsigned i, j;
5721   bitmap_iterator bi;
5722   tree obj;
5723
5724   htab_empty (data->niters);
5725
5726   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5727     {
5728       struct version_info *info;
5729
5730       info = ver_info (data, i);
5731       if (info->iv)
5732         free (info->iv);
5733       info->iv = NULL;
5734       info->has_nonlin_use = false;
5735       info->preserve_biv = false;
5736       info->inv_id = 0;
5737     }
5738   bitmap_clear (data->relevant);
5739   bitmap_clear (data->important_candidates);
5740
5741   for (i = 0; i < n_iv_uses (data); i++)
5742     {
5743       struct iv_use *use = iv_use (data, i);
5744
5745       free (use->iv);
5746       BITMAP_FREE (use->related_cands);
5747       for (j = 0; j < use->n_map_members; j++)
5748         if (use->cost_map[j].depends_on)
5749           BITMAP_FREE (use->cost_map[j].depends_on);
5750       free (use->cost_map);
5751       free (use);
5752     }
5753   VEC_truncate (iv_use_p, data->iv_uses, 0);
5754
5755   for (i = 0; i < n_iv_cands (data); i++)
5756     {
5757       struct iv_cand *cand = iv_cand (data, i);
5758
5759       if (cand->iv)
5760         free (cand->iv);
5761       if (cand->depends_on)
5762         BITMAP_FREE (cand->depends_on);
5763       free (cand);
5764     }
5765   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5766
5767   if (data->version_info_size < num_ssa_names)
5768     {
5769       data->version_info_size = 2 * num_ssa_names;
5770       free (data->version_info);
5771       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5772     }
5773
5774   data->max_inv_id = 0;
5775
5776   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5777     SET_DECL_RTL (obj, NULL_RTX);
5778
5779   VEC_truncate (tree, decl_rtl_to_reset, 0);
5780 }
5781
5782 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5783    loop tree.  */
5784
5785 static void
5786 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5787 {
5788   free_loop_data (data);
5789   free (data->version_info);
5790   BITMAP_FREE (data->relevant);
5791   BITMAP_FREE (data->important_candidates);
5792   htab_delete (data->niters);
5793
5794   VEC_free (tree, heap, decl_rtl_to_reset);
5795   VEC_free (iv_use_p, heap, data->iv_uses);
5796   VEC_free (iv_cand_p, heap, data->iv_candidates);
5797 }
5798
5799 /* Optimizes the LOOP.  Returns true if anything changed.  */
5800
5801 static bool
5802 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5803 {
5804   bool changed = false;
5805   struct iv_ca *iv_ca;
5806   edge exit;
5807
5808   data->current_loop = loop;
5809
5810   if (dump_file && (dump_flags & TDF_DETAILS))
5811     {
5812       fprintf (dump_file, "Processing loop %d\n", loop->num);
5813
5814       exit = single_dom_exit (loop);
5815       if (exit)
5816         {
5817           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5818                    exit->src->index, exit->dest->index);
5819           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5820           fprintf (dump_file, "\n");
5821         }
5822
5823       fprintf (dump_file, "\n");
5824     }
5825
5826   /* For each ssa name determines whether it behaves as an induction variable
5827      in some loop.  */
5828   if (!find_induction_variables (data))
5829     goto finish;
5830
5831   /* Finds interesting uses (item 1).  */
5832   find_interesting_uses (data);
5833   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5834     goto finish;
5835
5836   /* Finds candidates for the induction variables (item 2).  */
5837   find_iv_candidates (data);
5838
5839   /* Calculates the costs (item 3, part 1).  */
5840   determine_use_iv_costs (data);
5841   determine_iv_costs (data);
5842   determine_set_costs (data);
5843
5844   /* Find the optimal set of induction variables (item 3, part 2).  */
5845   iv_ca = find_optimal_iv_set (data);
5846   if (!iv_ca)
5847     goto finish;
5848   changed = true;
5849
5850   /* Create the new induction variables (item 4, part 1).  */
5851   create_new_ivs (data, iv_ca);
5852   iv_ca_free (&iv_ca);
5853
5854   /* Rewrite the uses (item 4, part 2).  */
5855   rewrite_uses (data);
5856
5857   /* Remove the ivs that are unused after rewriting.  */
5858   remove_unused_ivs (data);
5859
5860   /* We have changed the structure of induction variables; it might happen
5861      that definitions in the scev database refer to some of them that were
5862      eliminated.  */
5863   scev_reset ();
5864
5865 finish:
5866   free_loop_data (data);
5867
5868   return changed;
5869 }
5870
5871 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5872
5873 void
5874 tree_ssa_iv_optimize (struct loops *loops)
5875 {
5876   struct loop *loop;
5877   struct ivopts_data data;
5878
5879   tree_ssa_iv_optimize_init (&data);
5880
5881   /* Optimize the loops starting with the innermost ones.  */
5882   loop = loops->tree_root;
5883   while (loop->inner)
5884     loop = loop->inner;
5885
5886   /* Scan the loops, inner ones first.  */
5887   while (loop != loops->tree_root)
5888     {
5889       if (dump_file && (dump_flags & TDF_DETAILS))
5890         flow_loop_dump (loop, dump_file, NULL, 1);
5891
5892       tree_ssa_iv_optimize_loop (&data, loop);
5893
5894       if (loop->next)
5895         {
5896           loop = loop->next;
5897           while (loop->inner)
5898             loop = loop->inner;
5899         }
5900       else
5901         loop = loop->outer;
5902     }
5903
5904   tree_ssa_iv_optimize_finalize (&data);
5905 }