gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Information attached to loop.  */
 125 struct loop_data
 126 {
 127   unsigned regs_used;   /* Number of registers used.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_OUTER,            /* The induction variable is used outside the loop.  */
 135   USE_ADDRESS,          /* Use in an address.  */
 136   USE_COMPARE           /* Use is a compare.  */
 137 };
 138
 139 /* The candidate - cost pair.  */
 140 struct cost_pair
 141 {
 142   struct iv_cand *cand; /* The candidate.  */
 143   unsigned cost;        /* The cost.  */
 144   bitmap depends_on;    /* The list of invariants that have to be
 145                            preserved.  */
 146   tree value;           /* For final value elimination, the expression for
 147                            the final value of the iv.  For iv elimination,
 148                            the new bound to compare with.  */
 149 };
 150
 151 /* Use.  */
 152 struct iv_use
 153 {
 154   unsigned id;          /* The id of the use.  */
 155   enum use_type type;   /* Type of the use.  */
 156   struct iv *iv;        /* The induction variable it is based on.  */
 157   tree stmt;            /* Statement in that it occurs.  */
 158   tree *op_p;           /* The place where it occurs.  */
 159   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 160                            important ones.  */
 161
 162   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 163   struct cost_pair *cost_map;
 164                         /* The costs wrto the iv candidates.  */
 165
 166   struct iv_cand *selected;
 167                         /* The selected candidate.  */
 168 };
 169
 170 /* The position where the iv is computed.  */
 171 enum iv_position
 172 {
 173   IP_NORMAL,            /* At the end, just before the exit condition.  */
 174   IP_END,               /* At the end of the latch block.  */
 175   IP_ORIGINAL           /* The original biv.  */
 176 };
 177
 178 /* The induction variable candidate.  */
 179 struct iv_cand
 180 {
 181   unsigned id;          /* The number of the candidate.  */
 182   bool important;       /* Whether this is an "important" candidate, i.e. such
 183                            that it should be considered by all uses.  */
 184   enum iv_position pos; /* Where it is computed.  */
 185   tree incremented_at;  /* For original biv, the statement where it is
 186                            incremented.  */
 187   tree var_before;      /* The variable used for it before increment.  */
 188   tree var_after;       /* The variable used for it after increment.  */
 189   struct iv *iv;        /* The value of the candidate.  NULL for
 190                            "pseudocandidate" used to indicate the possibility
 191                            to replace the final value of an iv by direct
 192                            computation of the value.  */
 193   unsigned cost;        /* Cost of the candidate.  */
 194   bitmap depends_on;    /* The list of invariants that are used in step of the
 195                            biv.  */
 196 };
 197
 198 /* The data used by the induction variable optimizations.  */
 199
 200 typedef struct iv_use *iv_use_p;
 201 DEF_VEC_P(iv_use_p);
 202 DEF_VEC_ALLOC_P(iv_use_p,heap);
 203
 204 typedef struct iv_cand *iv_cand_p;
 205 DEF_VEC_P(iv_cand_p);
 206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 207
 208 struct ivopts_data
 209 {
 210   /* The currently optimized loop.  */
 211   struct loop *current_loop;
 212
 213   /* Numbers of iterations for all exits of the current loop.  */
 214   htab_t niters;
 215
 216   /* The size of version_info array allocated.  */
 217   unsigned version_info_size;
 218
 219   /* The array of information for the ssa names.  */
 220   struct version_info *version_info;
 221
 222   /* The bitmap of indices in version_info whose value was changed.  */
 223   bitmap relevant;
 224
 225   /* The maximum invariant id.  */
 226   unsigned max_inv_id;
 227
 228   /* The uses of induction variables.  */
 229   VEC(iv_use_p,heap) *iv_uses;
 230
 231   /* The candidates.  */
 232   VEC(iv_cand_p,heap) *iv_candidates;
 233
 234   /* A bitmap of important candidates.  */
 235   bitmap important_candidates;
 236
 237   /* Whether to consider just related and important candidates when replacing a
 238      use.  */
 239   bool consider_all_candidates;
 240 };
 241
 242 /* An assignment of iv candidates to uses.  */
 243
 244 struct iv_ca
 245 {
 246   /* The number of uses covered by the assignment.  */
 247   unsigned upto;
 248
 249   /* Number of uses that cannot be expressed by the candidates in the set.  */
 250   unsigned bad_uses;
 251
 252   /* Candidate assigned to a use, together with the related costs.  */
 253   struct cost_pair **cand_for_use;
 254
 255   /* Number of times each candidate is used.  */
 256   unsigned *n_cand_uses;
 257
 258   /* The candidates used.  */
 259   bitmap cands;
 260
 261   /* The number of candidates in the set.  */
 262   unsigned n_cands;
 263
 264   /* Total number of registers needed.  */
 265   unsigned n_regs;
 266
 267   /* Total cost of expressing uses.  */
 268   unsigned cand_use_cost;
 269
 270   /* Total cost of candidates.  */
 271   unsigned cand_cost;
 272
 273   /* Number of times each invariant is used.  */
 274   unsigned *n_invariant_uses;
 275
 276   /* Total cost of the assignment.  */
 277   unsigned cost;
 278 };
 279
 280 /* Difference of two iv candidate assignments.  */
 281
 282 struct iv_ca_delta
 283 {
 284   /* Changed use.  */
 285   struct iv_use *use;
 286
 287   /* An old assignment (for rollback purposes).  */
 288   struct cost_pair *old_cp;
 289
 290   /* A new assignment.  */
 291   struct cost_pair *new_cp;
 292
 293   /* Next change in the list.  */
 294   struct iv_ca_delta *next_change;
 295 };
 296
 297 /* Bound on number of candidates below that all candidates are considered.  */
 298
 299 #define CONSIDER_ALL_CANDIDATES_BOUND \
 300   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 301
 302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 303    optimizing such a loop would help, and it would take ages).  */
 304
 305 #define MAX_CONSIDERED_USES \
 306   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 307
 308 /* If there are at most this number of ivs in the set, try removing unnecessary
 309    ivs from the set always.  */
 310
 311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 312   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 313
 314 /* The list of trees for that the decl_rtl field must be reset is stored
 315    here.  */
 316
 317 static VEC(tree,heap) *decl_rtl_to_reset;
 318
 319 /* Number of uses recorded in DATA.  */
 320
 321 static inline unsigned
 322 n_iv_uses (struct ivopts_data *data)
 323 {
 324   return VEC_length (iv_use_p, data->iv_uses);
 325 }
 326
 327 /* Ith use recorded in DATA.  */
 328
 329 static inline struct iv_use *
 330 iv_use (struct ivopts_data *data, unsigned i)
 331 {
 332   return VEC_index (iv_use_p, data->iv_uses, i);
 333 }
 334
 335 /* Number of candidates recorded in DATA.  */
 336
 337 static inline unsigned
 338 n_iv_cands (struct ivopts_data *data)
 339 {
 340   return VEC_length (iv_cand_p, data->iv_candidates);
 341 }
 342
 343 /* Ith candidate recorded in DATA.  */
 344
 345 static inline struct iv_cand *
 346 iv_cand (struct ivopts_data *data, unsigned i)
 347 {
 348   return VEC_index (iv_cand_p, data->iv_candidates, i);
 349 }
 350
 351 /* The data for LOOP.  */
 352
 353 static inline struct loop_data *
 354 loop_data (struct loop *loop)
 355 {
 356   return loop->aux;
 357 }
 358
 359 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 360
 361 static edge
 362 single_dom_exit (struct loop *loop)
 363 {
 364   edge exit = loop->single_exit;
 365
 366   if (!exit)
 367     return NULL;
 368
 369   if (!just_once_each_iteration_p (loop, exit->src))
 370     return NULL;
 371
 372   return exit;
 373 }
 374
 375 /* Dumps information about the induction variable IV to FILE.  */
 376
 377 extern void dump_iv (FILE *, struct iv *);
 378 void
 379 dump_iv (FILE *file, struct iv *iv)
 380 {
 381   if (iv->ssa_name)
 382     {
 383       fprintf (file, "ssa name ");
 384       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 385       fprintf (file, "\n");
 386     }
 387
 388   fprintf (file, "  type ");
 389   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 390   fprintf (file, "\n");
 391
 392   if (iv->step)
 393     {
 394       fprintf (file, "  base ");
 395       print_generic_expr (file, iv->base, TDF_SLIM);
 396       fprintf (file, "\n");
 397
 398       fprintf (file, "  step ");
 399       print_generic_expr (file, iv->step, TDF_SLIM);
 400       fprintf (file, "\n");
 401     }
 402   else
 403     {
 404       fprintf (file, "  invariant ");
 405       print_generic_expr (file, iv->base, TDF_SLIM);
 406       fprintf (file, "\n");
 407     }
 408
 409   if (iv->base_object)
 410     {
 411       fprintf (file, "  base object ");
 412       print_generic_expr (file, iv->base_object, TDF_SLIM);
 413       fprintf (file, "\n");
 414     }
 415
 416   if (iv->biv_p)
 417     fprintf (file, "  is a biv\n");
 418 }
 419
 420 /* Dumps information about the USE to FILE.  */
 421
 422 extern void dump_use (FILE *, struct iv_use *);
 423 void
 424 dump_use (FILE *file, struct iv_use *use)
 425 {
 426   fprintf (file, "use %d\n", use->id);
 427
 428   switch (use->type)
 429     {
 430     case USE_NONLINEAR_EXPR:
 431       fprintf (file, "  generic\n");
 432       break;
 433
 434     case USE_OUTER:
 435       fprintf (file, "  outside\n");
 436       break;
 437
 438     case USE_ADDRESS:
 439       fprintf (file, "  address\n");
 440       break;
 441
 442     case USE_COMPARE:
 443       fprintf (file, "  compare\n");
 444       break;
 445
 446     default:
 447       gcc_unreachable ();
 448     }
 449
 450   fprintf (file, "  in statement ");
 451   print_generic_expr (file, use->stmt, TDF_SLIM);
 452   fprintf (file, "\n");
 453
 454   fprintf (file, "  at position ");
 455   if (use->op_p)
 456     print_generic_expr (file, *use->op_p, TDF_SLIM);
 457   fprintf (file, "\n");
 458
 459   dump_iv (file, use->iv);
 460
 461   if (use->related_cands)
 462     {
 463       fprintf (file, "  related candidates ");
 464       dump_bitmap (file, use->related_cands);
 465     }
 466 }
 467
 468 /* Dumps information about the uses to FILE.  */
 469
 470 extern void dump_uses (FILE *, struct ivopts_data *);
 471 void
 472 dump_uses (FILE *file, struct ivopts_data *data)
 473 {
 474   unsigned i;
 475   struct iv_use *use;
 476
 477   for (i = 0; i < n_iv_uses (data); i++)
 478     {
 479       use = iv_use (data, i);
 480
 481       dump_use (file, use);
 482       fprintf (file, "\n");
 483     }
 484 }
 485
 486 /* Dumps information about induction variable candidate CAND to FILE.  */
 487
 488 extern void dump_cand (FILE *, struct iv_cand *);
 489 void
 490 dump_cand (FILE *file, struct iv_cand *cand)
 491 {
 492   struct iv *iv = cand->iv;
 493
 494   fprintf (file, "candidate %d%s\n",
 495            cand->id, cand->important ? " (important)" : "");
 496
 497   if (cand->depends_on)
 498     {
 499       fprintf (file, "  depends on ");
 500       dump_bitmap (file, cand->depends_on);
 501     }
 502
 503   if (!iv)
 504     {
 505       fprintf (file, "  final value replacement\n");
 506       return;
 507     }
 508
 509   switch (cand->pos)
 510     {
 511     case IP_NORMAL:
 512       fprintf (file, "  incremented before exit test\n");
 513       break;
 514
 515     case IP_END:
 516       fprintf (file, "  incremented at end\n");
 517       break;
 518
 519     case IP_ORIGINAL:
 520       fprintf (file, "  original biv\n");
 521       break;
 522     }
 523
 524   dump_iv (file, iv);
 525 }
 526
 527 /* Returns the info for ssa version VER.  */
 528
 529 static inline struct version_info *
 530 ver_info (struct ivopts_data *data, unsigned ver)
 531 {
 532   return data->version_info + ver;
 533 }
 534
 535 /* Returns the info for ssa name NAME.  */
 536
 537 static inline struct version_info *
 538 name_info (struct ivopts_data *data, tree name)
 539 {
 540   return ver_info (data, SSA_NAME_VERSION (name));
 541 }
 542
 543 /* Checks whether there exists number X such that X * B = A, counting modulo
 544    2^BITS.  */
 545
 546 static bool
 547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 548         HOST_WIDE_INT *x)
 549 {
 550   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 551   unsigned HOST_WIDE_INT inv, ex, val;
 552   unsigned i;
 553
 554   a &= mask;
 555   b &= mask;
 556
 557   /* First divide the whole equation by 2 as long as possible.  */
 558   while (!(a & 1) && !(b & 1))
 559     {
 560       a >>= 1;
 561       b >>= 1;
 562       bits--;
 563       mask >>= 1;
 564     }
 565
 566   if (!(b & 1))
 567     {
 568       /* If b is still even, a is odd and there is no such x.  */
 569       return false;
 570     }
 571
 572   /* Find the inverse of b.  We compute it as
 573      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 574   inv = 1;
 575   ex = b;
 576   for (i = 0; i < bits - 1; i++)
 577     {
 578       inv = (inv * ex) & mask;
 579       ex = (ex * ex) & mask;
 580     }
 581
 582   val = (a * inv) & mask;
 583
 584   gcc_assert (((val * b) & mask) == a);
 585
 586   if ((val >> (bits - 1)) & 1)
 587     val |= ~mask;
 588
 589   *x = val;
 590
 591   return true;
 592 }
 593
 594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 595    emitted in LOOP.  */
 596
 597 static bool
 598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 599 {
 600   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 601
 602   gcc_assert (bb);
 603
 604   if (sbb == loop->latch)
 605     return true;
 606
 607   if (sbb != bb)
 608     return false;
 609
 610   return stmt == last_stmt (bb);
 611 }
 612
 613 /* Returns true if STMT if after the place where the original induction
 614    variable CAND is incremented.  */
 615
 616 static bool
 617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 618 {
 619   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 620   basic_block stmt_bb = bb_for_stmt (stmt);
 621   block_stmt_iterator bsi;
 622
 623   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 624     return false;
 625
 626   if (stmt_bb != cand_bb)
 627     return true;
 628
 629   /* Scan the block from the end, since the original ivs are usually
 630      incremented at the end of the loop body.  */
 631   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 632     {
 633       if (bsi_stmt (bsi) == cand->incremented_at)
 634         return false;
 635       if (bsi_stmt (bsi) == stmt)
 636         return true;
 637     }
 638 }
 639
 640 /* Returns true if STMT if after the place where the induction variable
 641    CAND is incremented in LOOP.  */
 642
 643 static bool
 644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 645 {
 646   switch (cand->pos)
 647     {
 648     case IP_END:
 649       return false;
 650
 651     case IP_NORMAL:
 652       return stmt_after_ip_normal_pos (loop, stmt);
 653
 654     case IP_ORIGINAL:
 655       return stmt_after_ip_original_pos (cand, stmt);
 656
 657     default:
 658       gcc_unreachable ();
 659     }
 660 }
 661
 662 /* Element of the table in that we cache the numbers of iterations obtained
 663    from exits of the loop.  */
 664
 665 struct nfe_cache_elt
 666 {
 667   /* The edge for that the number of iterations is cached.  */
 668   edge exit;
 669
 670   /* True if the # of iterations was successfully determined.  */
 671   bool valid_p;
 672
 673   /* Description of # of iterations.  */
 674   struct tree_niter_desc niter;
 675 };
 676
 677 /* Hash function for nfe_cache_elt E.  */
 678
 679 static hashval_t
 680 nfe_hash (const void *e)
 681 {
 682   const struct nfe_cache_elt *elt = e;
 683
 684   return htab_hash_pointer (elt->exit);
 685 }
 686
 687 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 688
 689 static int
 690 nfe_eq (const void *e1, const void *e2)
 691 {
 692   const struct nfe_cache_elt *elt1 = e1;
 693
 694   return elt1->exit == e2;
 695 }
 696
 697 /*  Returns structure describing number of iterations determined from
 698     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 699
 700 static struct tree_niter_desc *
 701 niter_for_exit (struct ivopts_data *data, edge exit)
 702 {
 703   struct nfe_cache_elt *nfe_desc;
 704   PTR *slot;
 705
 706   slot = htab_find_slot_with_hash (data->niters, exit,
 707                                    htab_hash_pointer (exit),
 708                                    INSERT);
 709
 710   if (!*slot)
 711     {
 712       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 713       nfe_desc->exit = exit;
 714       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 715                                                      exit, &nfe_desc->niter);
 716       *slot = nfe_desc;
 717     }
 718   else
 719     nfe_desc = *slot;
 720
 721   if (!nfe_desc->valid_p)
 722     return NULL;
 723
 724   return &nfe_desc->niter;
 725 }
 726
 727 /* Returns structure describing number of iterations determined from
 728    single dominating exit of DATA->current_loop, or NULL if something
 729    goes wrong.  */
 730
 731 static struct tree_niter_desc *
 732 niter_for_single_dom_exit (struct ivopts_data *data)
 733 {
 734   edge exit = single_dom_exit (data->current_loop);
 735
 736   if (!exit)
 737     return NULL;
 738
 739   return niter_for_exit (data, exit);
 740 }
 741
 742 /* Initializes data structures used by the iv optimization pass, stored
 743    in DATA.  LOOPS is the loop tree.  */
 744
 745 static void
 746 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 747 {
 748   unsigned i;
 749
 750   data->version_info_size = 2 * num_ssa_names;
 751   data->version_info = xcalloc (data->version_info_size,
 752                                 sizeof (struct version_info));
 753   data->relevant = BITMAP_ALLOC (NULL);
 754   data->important_candidates = BITMAP_ALLOC (NULL);
 755   data->max_inv_id = 0;
 756   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 757
 758   for (i = 1; i < loops->num; i++)
 759     if (loops->parray[i])
 760       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 761
 762   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 763   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 764   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 765 }
 766
 767 /* Returns a memory object to that EXPR points.  In case we are able to
 768    determine that it does not point to any such object, NULL is returned.  */
 769
 770 static tree
 771 determine_base_object (tree expr)
 772 {
 773   enum tree_code code = TREE_CODE (expr);
 774   tree base, obj, op0, op1;
 775
 776   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 777     return NULL_TREE;
 778
 779   switch (code)
 780     {
 781     case INTEGER_CST:
 782       return NULL_TREE;
 783
 784     case ADDR_EXPR:
 785       obj = TREE_OPERAND (expr, 0);
 786       base = get_base_address (obj);
 787
 788       if (!base)
 789         return expr;
 790
 791       if (TREE_CODE (base) == INDIRECT_REF)
 792         return determine_base_object (TREE_OPERAND (base, 0));
 793
 794       return fold_convert (ptr_type_node,
 795                            build_fold_addr_expr (base));
 796
 797     case PLUS_EXPR:
 798     case MINUS_EXPR:
 799       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 800       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 801
 802       if (!op1)
 803         return op0;
 804
 805       if (!op0)
 806         return (code == PLUS_EXPR
 807                 ? op1
 808                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 809
 810       return fold_build2 (code, ptr_type_node, op0, op1);
 811
 812     case NOP_EXPR:
 813     case CONVERT_EXPR:
 814       return determine_base_object (TREE_OPERAND (expr, 0));
 815
 816     default:
 817       return fold_convert (ptr_type_node, expr);
 818     }
 819 }
 820
 821 /* Allocates an induction variable with given initial value BASE and step STEP
 822    for loop LOOP.  */
 823
 824 static struct iv *
 825 alloc_iv (tree base, tree step)
 826 {
 827   struct iv *iv = xcalloc (1, sizeof (struct iv));
 828
 829   if (step && integer_zerop (step))
 830     step = NULL_TREE;
 831
 832   iv->base = base;
 833   iv->base_object = determine_base_object (base);
 834   iv->step = step;
 835   iv->biv_p = false;
 836   iv->have_use_for = false;
 837   iv->use_id = 0;
 838   iv->ssa_name = NULL_TREE;
 839
 840   return iv;
 841 }
 842
 843 /* Sets STEP and BASE for induction variable IV.  */
 844
 845 static void
 846 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 847 {
 848   struct version_info *info = name_info (data, iv);
 849
 850   gcc_assert (!info->iv);
 851
 852   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 853   info->iv = alloc_iv (base, step);
 854   info->iv->ssa_name = iv;
 855 }
 856
 857 /* Finds induction variable declaration for VAR.  */
 858
 859 static struct iv *
 860 get_iv (struct ivopts_data *data, tree var)
 861 {
 862   basic_block bb;
 863
 864   if (!name_info (data, var)->iv)
 865     {
 866       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 867
 868       if (!bb
 869           || !flow_bb_inside_loop_p (data->current_loop, bb))
 870         set_iv (data, var, var, NULL_TREE);
 871     }
 872
 873   return name_info (data, var)->iv;
 874 }
 875
 876 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 877    not define a simple affine biv with nonzero step.  */
 878
 879 static tree
 880 determine_biv_step (tree phi)
 881 {
 882   struct loop *loop = bb_for_stmt (phi)->loop_father;
 883   tree name = PHI_RESULT (phi), base, step;
 884
 885   if (!is_gimple_reg (name))
 886     return NULL_TREE;
 887
 888   if (!simple_iv (loop, phi, name, &base, &step, true))
 889     return NULL_TREE;
 890
 891   if (zero_p (step))
 892     return NULL_TREE;
 893
 894   return step;
 895 }
 896
 897 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 898
 899 static bool
 900 abnormal_ssa_name_p (tree exp)
 901 {
 902   if (!exp)
 903     return false;
 904
 905   if (TREE_CODE (exp) != SSA_NAME)
 906     return false;
 907
 908   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 909 }
 910
 911 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 912    abnormal phi node.  Callback for for_each_index.  */
 913
 914 static bool
 915 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 916                                   void *data ATTRIBUTE_UNUSED)
 917 {
 918   if (TREE_CODE (base) == ARRAY_REF)
 919     {
 920       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 921         return false;
 922       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 923         return false;
 924     }
 925
 926   return !abnormal_ssa_name_p (*index);
 927 }
 928
 929 /* Returns true if EXPR contains a ssa name that occurs in an
 930    abnormal phi node.  */
 931
 932 static bool
 933 contains_abnormal_ssa_name_p (tree expr)
 934 {
 935   enum tree_code code;
 936   enum tree_code_class class;
 937
 938   if (!expr)
 939     return false;
 940
 941   code = TREE_CODE (expr);
 942   class = TREE_CODE_CLASS (code);
 943
 944   if (code == SSA_NAME)
 945     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 946
 947   if (code == INTEGER_CST
 948       || is_gimple_min_invariant (expr))
 949     return false;
 950
 951   if (code == ADDR_EXPR)
 952     return !for_each_index (&TREE_OPERAND (expr, 0),
 953                             idx_contains_abnormal_ssa_name_p,
 954                             NULL);
 955
 956   switch (class)
 957     {
 958     case tcc_binary:
 959     case tcc_comparison:
 960       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 961         return true;
 962
 963       /* Fallthru.  */
 964     case tcc_unary:
 965       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 966         return true;
 967
 968       break;
 969
 970     default:
 971       gcc_unreachable ();
 972     }
 973
 974   return false;
 975 }
 976
 977 /* Finds basic ivs.  */
 978
 979 static bool
 980 find_bivs (struct ivopts_data *data)
 981 {
 982   tree phi, step, type, base;
 983   bool found = false;
 984   struct loop *loop = data->current_loop;
 985
 986   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 987     {
 988       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 989         continue;
 990
 991       step = determine_biv_step (phi);
 992       if (!step)
 993         continue;
 994
 995       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 996       base = expand_simple_operations (base);
 997       if (contains_abnormal_ssa_name_p (base)
 998           || contains_abnormal_ssa_name_p (step))
 999         continue;
1000
1001       type = TREE_TYPE (PHI_RESULT (phi));
1002       base = fold_convert (type, base);
1003       if (step)
1004         step = fold_convert (type, step);
1005
1006       set_iv (data, PHI_RESULT (phi), base, step);
1007       found = true;
1008     }
1009
1010   return found;
1011 }
1012
1013 /* Marks basic ivs.  */
1014
1015 static void
1016 mark_bivs (struct ivopts_data *data)
1017 {
1018   tree phi, var;
1019   struct iv *iv, *incr_iv;
1020   struct loop *loop = data->current_loop;
1021   basic_block incr_bb;
1022
1023   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1024     {
1025       iv = get_iv (data, PHI_RESULT (phi));
1026       if (!iv)
1027         continue;
1028
1029       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1030       incr_iv = get_iv (data, var);
1031       if (!incr_iv)
1032         continue;
1033
1034       /* If the increment is in the subloop, ignore it.  */
1035       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1036       if (incr_bb->loop_father != data->current_loop
1037           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1038         continue;
1039
1040       iv->biv_p = true;
1041       incr_iv->biv_p = true;
1042     }
1043 }
1044
1045 /* Checks whether STMT defines a linear induction variable and stores its
1046    parameters to BASE and STEP.  */
1047
1048 static bool
1049 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1050                         tree *base, tree *step)
1051 {
1052   tree lhs;
1053   struct loop *loop = data->current_loop;
1054
1055   *base = NULL_TREE;
1056   *step = NULL_TREE;
1057
1058   if (TREE_CODE (stmt) != MODIFY_EXPR)
1059     return false;
1060
1061   lhs = TREE_OPERAND (stmt, 0);
1062   if (TREE_CODE (lhs) != SSA_NAME)
1063     return false;
1064
1065   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1066     return false;
1067   *base = expand_simple_operations (*base);
1068
1069   if (contains_abnormal_ssa_name_p (*base)
1070       || contains_abnormal_ssa_name_p (*step))
1071     return false;
1072
1073   return true;
1074 }
1075
1076 /* Finds general ivs in statement STMT.  */
1077
1078 static void
1079 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1080 {
1081   tree base, step;
1082
1083   if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1084     return;
1085
1086   set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1087 }
1088
1089 /* Finds general ivs in basic block BB.  */
1090
1091 static void
1092 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1093 {
1094   block_stmt_iterator bsi;
1095
1096   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1097     find_givs_in_stmt (data, bsi_stmt (bsi));
1098 }
1099
1100 /* Finds general ivs.  */
1101
1102 static void
1103 find_givs (struct ivopts_data *data)
1104 {
1105   struct loop *loop = data->current_loop;
1106   basic_block *body = get_loop_body_in_dom_order (loop);
1107   unsigned i;
1108
1109   for (i = 0; i < loop->num_nodes; i++)
1110     find_givs_in_bb (data, body[i]);
1111   free (body);
1112 }
1113
1114 /* For each ssa name defined in LOOP determines whether it is an induction
1115    variable and if so, its initial value and step.  */
1116
1117 static bool
1118 find_induction_variables (struct ivopts_data *data)
1119 {
1120   unsigned i;
1121   bitmap_iterator bi;
1122
1123   if (!find_bivs (data))
1124     return false;
1125
1126   find_givs (data);
1127   mark_bivs (data);
1128
1129   if (dump_file && (dump_flags & TDF_DETAILS))
1130     {
1131       struct tree_niter_desc *niter;
1132
1133       niter = niter_for_single_dom_exit (data);
1134
1135       if (niter)
1136         {
1137           fprintf (dump_file, "  number of iterations ");
1138           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1139           fprintf (dump_file, "\n");
1140
1141           fprintf (dump_file, "  may be zero if ");
1142           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1143           fprintf (dump_file, "\n");
1144           fprintf (dump_file, "\n");
1145         };
1146
1147       fprintf (dump_file, "Induction variables:\n\n");
1148
1149       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1150         {
1151           if (ver_info (data, i)->iv)
1152             dump_iv (dump_file, ver_info (data, i)->iv);
1153         }
1154     }
1155
1156   return true;
1157 }
1158
1159 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1160
1161 static struct iv_use *
1162 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1163             tree stmt, enum use_type use_type)
1164 {
1165   struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1166
1167   use->id = n_iv_uses (data);
1168   use->type = use_type;
1169   use->iv = iv;
1170   use->stmt = stmt;
1171   use->op_p = use_p;
1172   use->related_cands = BITMAP_ALLOC (NULL);
1173
1174   /* To avoid showing ssa name in the dumps, if it was not reset by the
1175      caller.  */
1176   iv->ssa_name = NULL_TREE;
1177
1178   if (dump_file && (dump_flags & TDF_DETAILS))
1179     dump_use (dump_file, use);
1180
1181   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1182
1183   return use;
1184 }
1185
1186 /* Checks whether OP is a loop-level invariant and if so, records it.
1187    NONLINEAR_USE is true if the invariant is used in a way we do not
1188    handle specially.  */
1189
1190 static void
1191 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1192 {
1193   basic_block bb;
1194   struct version_info *info;
1195
1196   if (TREE_CODE (op) != SSA_NAME
1197       || !is_gimple_reg (op))
1198     return;
1199
1200   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1201   if (bb
1202       && flow_bb_inside_loop_p (data->current_loop, bb))
1203     return;
1204
1205   info = name_info (data, op);
1206   info->name = op;
1207   info->has_nonlin_use |= nonlinear_use;
1208   if (!info->inv_id)
1209     info->inv_id = ++data->max_inv_id;
1210   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1211 }
1212
1213 /* Checks whether the use OP is interesting and if so, records it
1214    as TYPE.  */
1215
1216 static struct iv_use *
1217 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1218                                        enum use_type type)
1219 {
1220   struct iv *iv;
1221   struct iv *civ;
1222   tree stmt;
1223   struct iv_use *use;
1224
1225   if (TREE_CODE (op) != SSA_NAME)
1226     return NULL;
1227
1228   iv = get_iv (data, op);
1229   if (!iv)
1230     return NULL;
1231
1232   if (iv->have_use_for)
1233     {
1234       use = iv_use (data, iv->use_id);
1235
1236       gcc_assert (use->type == USE_NONLINEAR_EXPR
1237                   || use->type == USE_OUTER);
1238
1239       if (type == USE_NONLINEAR_EXPR)
1240         use->type = USE_NONLINEAR_EXPR;
1241       return use;
1242     }
1243
1244   if (zero_p (iv->step))
1245     {
1246       record_invariant (data, op, true);
1247       return NULL;
1248     }
1249   iv->have_use_for = true;
1250
1251   civ = xmalloc (sizeof (struct iv));
1252   *civ = *iv;
1253
1254   stmt = SSA_NAME_DEF_STMT (op);
1255   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1256               || TREE_CODE (stmt) == MODIFY_EXPR);
1257
1258   use = record_use (data, NULL, civ, stmt, type);
1259   iv->use_id = use->id;
1260
1261   return use;
1262 }
1263
1264 /* Checks whether the use OP is interesting and if so, records it.  */
1265
1266 static struct iv_use *
1267 find_interesting_uses_op (struct ivopts_data *data, tree op)
1268 {
1269   return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1270 }
1271
1272 /* Records a definition of induction variable OP that is used outside of the
1273    loop.  */
1274
1275 static struct iv_use *
1276 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1277 {
1278   return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1279 }
1280
1281 /* Checks whether the condition *COND_P in STMT is interesting
1282    and if so, records it.  */
1283
1284 static void
1285 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1286 {
1287   tree *op0_p;
1288   tree *op1_p;
1289   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1290   struct iv const_iv;
1291   tree zero = integer_zero_node;
1292
1293   const_iv.step = NULL_TREE;
1294
1295   if (TREE_CODE (*cond_p) != SSA_NAME
1296       && !COMPARISON_CLASS_P (*cond_p))
1297     return;
1298
1299   if (TREE_CODE (*cond_p) == SSA_NAME)
1300     {
1301       op0_p = cond_p;
1302       op1_p = &zero;
1303     }
1304   else
1305     {
1306       op0_p = &TREE_OPERAND (*cond_p, 0);
1307       op1_p = &TREE_OPERAND (*cond_p, 1);
1308     }
1309
1310   if (TREE_CODE (*op0_p) == SSA_NAME)
1311     iv0 = get_iv (data, *op0_p);
1312   else
1313     iv0 = &const_iv;
1314
1315   if (TREE_CODE (*op1_p) == SSA_NAME)
1316     iv1 = get_iv (data, *op1_p);
1317   else
1318     iv1 = &const_iv;
1319
1320   if (/* When comparing with non-invariant value, we may not do any senseful
1321          induction variable elimination.  */
1322       (!iv0 || !iv1)
1323       /* Eliminating condition based on two ivs would be nontrivial.
1324          ??? TODO -- it is not really important to handle this case.  */
1325       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1326     {
1327       find_interesting_uses_op (data, *op0_p);
1328       find_interesting_uses_op (data, *op1_p);
1329       return;
1330     }
1331
1332   if (zero_p (iv0->step) && zero_p (iv1->step))
1333     {
1334       /* If both are invariants, this is a work for unswitching.  */
1335       return;
1336     }
1337
1338   civ = xmalloc (sizeof (struct iv));
1339   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1340   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1341 }
1342
1343 /* Returns true if expression EXPR is obviously invariant in LOOP,
1344    i.e. if all its operands are defined outside of the LOOP.  */
1345
1346 bool
1347 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1348 {
1349   basic_block def_bb;
1350   unsigned i, len;
1351
1352   if (is_gimple_min_invariant (expr))
1353     return true;
1354
1355   if (TREE_CODE (expr) == SSA_NAME)
1356     {
1357       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1358       if (def_bb
1359           && flow_bb_inside_loop_p (loop, def_bb))
1360         return false;
1361
1362       return true;
1363     }
1364
1365   if (!EXPR_P (expr))
1366     return false;
1367
1368   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1369   for (i = 0; i < len; i++)
1370     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1371       return false;
1372
1373   return true;
1374 }
1375
1376 /* Cumulates the steps of indices into DATA and replaces their values with the
1377    initial ones.  Returns false when the value of the index cannot be determined.
1378    Callback for for_each_index.  */
1379
1380 struct ifs_ivopts_data
1381 {
1382   struct ivopts_data *ivopts_data;
1383   tree stmt;
1384   tree *step_p;
1385 };
1386
1387 static bool
1388 idx_find_step (tree base, tree *idx, void *data)
1389 {
1390   struct ifs_ivopts_data *dta = data;
1391   struct iv *iv;
1392   tree step, type, iv_type, iv_step, lbound, off;
1393   struct loop *loop = dta->ivopts_data->current_loop;
1394
1395   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1396       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1397     return false;
1398
1399   /* If base is a component ref, require that the offset of the reference
1400      be invariant.  */
1401   if (TREE_CODE (base) == COMPONENT_REF)
1402     {
1403       off = component_ref_field_offset (base);
1404       return expr_invariant_in_loop_p (loop, off);
1405     }
1406
1407   /* If base is array, first check whether we will be able to move the
1408      reference out of the loop (in order to take its address in strength
1409      reduction).  In order for this to work we need both lower bound
1410      and step to be loop invariants.  */
1411   if (TREE_CODE (base) == ARRAY_REF)
1412     {
1413       step = array_ref_element_size (base);
1414       lbound = array_ref_low_bound (base);
1415
1416       if (!expr_invariant_in_loop_p (loop, step)
1417           || !expr_invariant_in_loop_p (loop, lbound))
1418         return false;
1419     }
1420
1421   if (TREE_CODE (*idx) != SSA_NAME)
1422     return true;
1423
1424   iv = get_iv (dta->ivopts_data, *idx);
1425   if (!iv)
1426     return false;
1427
1428   *idx = iv->base;
1429
1430   if (!iv->step)
1431     return true;
1432
1433   iv_type = TREE_TYPE (iv->base);
1434   type = build_pointer_type (TREE_TYPE (base));
1435   if (TREE_CODE (base) == ARRAY_REF)
1436     {
1437       step = array_ref_element_size (base);
1438
1439       /* We only handle addresses whose step is an integer constant.  */
1440       if (TREE_CODE (step) != INTEGER_CST)
1441         return false;
1442     }
1443   else
1444     /* The step for pointer arithmetics already is 1 byte.  */
1445     step = build_int_cst (type, 1);
1446
1447   if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1448     iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1449                                           type, iv->base, iv->step, dta->stmt);
1450   else
1451     iv_step = fold_convert (iv_type, iv->step);
1452
1453   if (!iv_step)
1454     {
1455       /* The index might wrap.  */
1456       return false;
1457     }
1458
1459   step = fold_build2 (MULT_EXPR, type, step, iv_step);
1460
1461   if (!*dta->step_p)
1462     *dta->step_p = step;
1463   else
1464     *dta->step_p = fold_build2 (PLUS_EXPR, type, *dta->step_p, step);
1465
1466   return true;
1467 }
1468
1469 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1470    object is passed to it in DATA.  */
1471
1472 static bool
1473 idx_record_use (tree base, tree *idx,
1474                 void *data)
1475 {
1476   find_interesting_uses_op (data, *idx);
1477   if (TREE_CODE (base) == ARRAY_REF)
1478     {
1479       find_interesting_uses_op (data, array_ref_element_size (base));
1480       find_interesting_uses_op (data, array_ref_low_bound (base));
1481     }
1482   return true;
1483 }
1484
1485 /* Returns true if memory reference REF may be unaligned.  */
1486
1487 static bool
1488 may_be_unaligned_p (tree ref)
1489 {
1490   tree base;
1491   tree base_type;
1492   HOST_WIDE_INT bitsize;
1493   HOST_WIDE_INT bitpos;
1494   tree toffset;
1495   enum machine_mode mode;
1496   int unsignedp, volatilep;
1497   unsigned base_align;
1498
1499   /* The test below is basically copy of what expr.c:normal_inner_ref
1500      does to check whether the object must be loaded by parts when
1501      STRICT_ALIGNMENT is true.  */
1502   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1503                               &unsignedp, &volatilep, true);
1504   base_type = TREE_TYPE (base);
1505   base_align = TYPE_ALIGN (base_type);
1506
1507   if (mode != BLKmode
1508       && (base_align < GET_MODE_ALIGNMENT (mode)
1509           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1510           || bitpos % BITS_PER_UNIT != 0))
1511     return true;
1512
1513   return false;
1514 }
1515
1516 /* Builds ADDR_EXPR of object OBJ.  If OBJ is an INDIRECT_REF, the indirect_ref
1517    is stripped instead.  */
1518
1519 static tree
1520 build_addr_strip_iref (tree obj)
1521 {
1522   tree type;
1523
1524   if (TREE_CODE (obj) == INDIRECT_REF)
1525     {
1526       type = build_pointer_type (TREE_TYPE (obj));
1527       obj = fold_convert (type, TREE_OPERAND (obj, 0));
1528     }
1529   else
1530     obj = build_addr (obj, current_function_decl);
1531
1532   return obj;
1533 }
1534
1535 /* Finds addresses in *OP_P inside STMT.  */
1536
1537 static void
1538 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1539 {
1540   tree base = unshare_expr (*op_p), step = NULL;
1541   struct iv *civ;
1542   struct ifs_ivopts_data ifs_ivopts_data;
1543
1544   /* Do not play with volatile memory references.  A bit too conservative,
1545      perhaps, but safe.  */
1546   if (stmt_ann (stmt)->has_volatile_ops)
1547     goto fail;
1548
1549   /* Ignore bitfields for now.  Not really something terribly complicated
1550      to handle.  TODO.  */
1551   if (TREE_CODE (base) == COMPONENT_REF
1552       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1553     goto fail;
1554
1555   if (STRICT_ALIGNMENT
1556       && may_be_unaligned_p (base))
1557     goto fail;
1558
1559   ifs_ivopts_data.ivopts_data = data;
1560   ifs_ivopts_data.stmt = stmt;
1561   ifs_ivopts_data.step_p = &step;
1562   if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1563       || zero_p (step))
1564     goto fail;
1565
1566   gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1567   gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1568
1569   base = build_addr_strip_iref (base);
1570
1571   civ = alloc_iv (base, step);
1572   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1573   return;
1574
1575 fail:
1576   for_each_index (op_p, idx_record_use, data);
1577 }
1578
1579 /* Finds and records invariants used in STMT.  */
1580
1581 static void
1582 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1583 {
1584   ssa_op_iter iter;
1585   use_operand_p use_p;
1586   tree op;
1587
1588   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1589     {
1590       op = USE_FROM_PTR (use_p);
1591       record_invariant (data, op, false);
1592     }
1593 }
1594
1595 /* Finds interesting uses of induction variables in the statement STMT.  */
1596
1597 static void
1598 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1599 {
1600   struct iv *iv;
1601   tree op, lhs, rhs;
1602   ssa_op_iter iter;
1603   use_operand_p use_p;
1604
1605   find_invariants_stmt (data, stmt);
1606
1607   if (TREE_CODE (stmt) == COND_EXPR)
1608     {
1609       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1610       return;
1611     }
1612
1613   if (TREE_CODE (stmt) == MODIFY_EXPR)
1614     {
1615       lhs = TREE_OPERAND (stmt, 0);
1616       rhs = TREE_OPERAND (stmt, 1);
1617
1618       if (TREE_CODE (lhs) == SSA_NAME)
1619         {
1620           /* If the statement defines an induction variable, the uses are not
1621              interesting by themselves.  */
1622
1623           iv = get_iv (data, lhs);
1624
1625           if (iv && !zero_p (iv->step))
1626             return;
1627         }
1628
1629       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1630         {
1631         case tcc_comparison:
1632           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1633           return;
1634
1635         case tcc_reference:
1636           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1637           if (REFERENCE_CLASS_P (lhs))
1638             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1639           return;
1640
1641         default: ;
1642         }
1643
1644       if (REFERENCE_CLASS_P (lhs)
1645           && is_gimple_val (rhs))
1646         {
1647           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1648           find_interesting_uses_op (data, rhs);
1649           return;
1650         }
1651
1652       /* TODO -- we should also handle address uses of type
1653
1654          memory = call (whatever);
1655
1656          and
1657
1658          call (memory).  */
1659     }
1660
1661   if (TREE_CODE (stmt) == PHI_NODE
1662       && bb_for_stmt (stmt) == data->current_loop->header)
1663     {
1664       lhs = PHI_RESULT (stmt);
1665       iv = get_iv (data, lhs);
1666
1667       if (iv && !zero_p (iv->step))
1668         return;
1669     }
1670
1671   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1672     {
1673       op = USE_FROM_PTR (use_p);
1674
1675       if (TREE_CODE (op) != SSA_NAME)
1676         continue;
1677
1678       iv = get_iv (data, op);
1679       if (!iv)
1680         continue;
1681
1682       find_interesting_uses_op (data, op);
1683     }
1684 }
1685
1686 /* Finds interesting uses of induction variables outside of loops
1687    on loop exit edge EXIT.  */
1688
1689 static void
1690 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1691 {
1692   tree phi, def;
1693
1694   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1695     {
1696       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1697       find_interesting_uses_outer (data, def);
1698     }
1699 }
1700
1701 /* Finds uses of the induction variables that are interesting.  */
1702
1703 static void
1704 find_interesting_uses (struct ivopts_data *data)
1705 {
1706   basic_block bb;
1707   block_stmt_iterator bsi;
1708   tree phi;
1709   basic_block *body = get_loop_body (data->current_loop);
1710   unsigned i;
1711   struct version_info *info;
1712   edge e;
1713
1714   if (dump_file && (dump_flags & TDF_DETAILS))
1715     fprintf (dump_file, "Uses:\n\n");
1716
1717   for (i = 0; i < data->current_loop->num_nodes; i++)
1718     {
1719       edge_iterator ei;
1720       bb = body[i];
1721
1722       FOR_EACH_EDGE (e, ei, bb->succs)
1723         if (e->dest != EXIT_BLOCK_PTR
1724             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1725           find_interesting_uses_outside (data, e);
1726
1727       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1728         find_interesting_uses_stmt (data, phi);
1729       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1730         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1731     }
1732
1733   if (dump_file && (dump_flags & TDF_DETAILS))
1734     {
1735       bitmap_iterator bi;
1736
1737       fprintf (dump_file, "\n");
1738
1739       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1740         {
1741           info = ver_info (data, i);
1742           if (info->inv_id)
1743             {
1744               fprintf (dump_file, "  ");
1745               print_generic_expr (dump_file, info->name, TDF_SLIM);
1746               fprintf (dump_file, " is invariant (%d)%s\n",
1747                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1748             }
1749         }
1750
1751       fprintf (dump_file, "\n");
1752     }
1753
1754   free (body);
1755 }
1756
1757 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1758    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1759    we are at the top-level of the processed address.  */
1760
1761 static tree
1762 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1763                 unsigned HOST_WIDE_INT *offset)
1764 {
1765   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1766   enum tree_code code;
1767   tree type, orig_type = TREE_TYPE (expr);
1768   unsigned HOST_WIDE_INT off0, off1, st;
1769   tree orig_expr = expr;
1770
1771   STRIP_NOPS (expr);
1772
1773   type = TREE_TYPE (expr);
1774   code = TREE_CODE (expr);
1775   *offset = 0;
1776
1777   switch (code)
1778     {
1779     case INTEGER_CST:
1780       if (!cst_and_fits_in_hwi (expr)
1781           || zero_p (expr))
1782         return orig_expr;
1783
1784       *offset = int_cst_value (expr);
1785       return build_int_cst_type (orig_type, 0);
1786
1787     case PLUS_EXPR:
1788     case MINUS_EXPR:
1789       op0 = TREE_OPERAND (expr, 0);
1790       op1 = TREE_OPERAND (expr, 1);
1791
1792       op0 = strip_offset_1 (op0, false, false, &off0);
1793       op1 = strip_offset_1 (op1, false, false, &off1);
1794
1795       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1796       if (op0 == TREE_OPERAND (expr, 0)
1797           && op1 == TREE_OPERAND (expr, 1))
1798         return orig_expr;
1799
1800       if (zero_p (op1))
1801         expr = op0;
1802       else if (zero_p (op0))
1803         {
1804           if (code == PLUS_EXPR)
1805             expr = op1;
1806           else
1807             expr = fold_build1 (NEGATE_EXPR, type, op1);
1808         }
1809       else
1810         expr = fold_build2 (code, type, op0, op1);
1811
1812       return fold_convert (orig_type, expr);
1813
1814     case ARRAY_REF:
1815       if (!inside_addr)
1816         return orig_expr;
1817
1818       step = array_ref_element_size (expr);
1819       if (!cst_and_fits_in_hwi (step))
1820         break;
1821
1822       st = int_cst_value (step);
1823       op1 = TREE_OPERAND (expr, 1);
1824       op1 = strip_offset_1 (op1, false, false, &off1);
1825       *offset = off1 * st;
1826
1827       if (top_compref
1828           && zero_p (op1))
1829         {
1830           /* Strip the component reference completely.  */
1831           op0 = TREE_OPERAND (expr, 0);
1832           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1833           *offset += off0;
1834           return op0;
1835         }
1836       break;
1837
1838     case COMPONENT_REF:
1839       if (!inside_addr)
1840         return orig_expr;
1841
1842       tmp = component_ref_field_offset (expr);
1843       if (top_compref
1844           && cst_and_fits_in_hwi (tmp))
1845         {
1846           /* Strip the component reference completely.  */
1847           op0 = TREE_OPERAND (expr, 0);
1848           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1849           *offset = off0 + int_cst_value (tmp);
1850           return op0;
1851         }
1852       break;
1853
1854     case ADDR_EXPR:
1855       op0 = TREE_OPERAND (expr, 0);
1856       op0 = strip_offset_1 (op0, true, true, &off0);
1857       *offset += off0;
1858
1859       if (op0 == TREE_OPERAND (expr, 0))
1860         return orig_expr;
1861
1862       expr = build_addr_strip_iref (op0);
1863       return fold_convert (orig_type, expr);
1864
1865     case INDIRECT_REF:
1866       inside_addr = false;
1867       break;
1868
1869     default:
1870       return orig_expr;
1871     }
1872
1873   /* Default handling of expressions for that we want to recurse into
1874      the first operand.  */
1875   op0 = TREE_OPERAND (expr, 0);
1876   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1877   *offset += off0;
1878
1879   if (op0 == TREE_OPERAND (expr, 0)
1880       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1881     return orig_expr;
1882
1883   expr = copy_node (expr);
1884   TREE_OPERAND (expr, 0) = op0;
1885   if (op1)
1886     TREE_OPERAND (expr, 1) = op1;
1887
1888   /* Inside address, we might strip the top level component references,
1889      thus changing type of the expression.  Handling of ADDR_EXPR
1890      will fix that.  */
1891   expr = fold_convert (orig_type, expr);
1892
1893   return expr;
1894 }
1895
1896 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1897
1898 static tree
1899 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1900 {
1901   return strip_offset_1 (expr, false, false, offset);
1902 }
1903
1904 /* Returns variant of TYPE that can be used as base for different uses.
1905    For integer types, we return unsigned variant of the type, which
1906    avoids problems with overflows.  For pointer types, we return void *.  */
1907
1908 static tree
1909 generic_type_for (tree type)
1910 {
1911   if (POINTER_TYPE_P (type))
1912     return ptr_type_node;
1913
1914   if (TYPE_UNSIGNED (type))
1915     return type;
1916
1917   return unsigned_type_for (type);
1918 }
1919
1920 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1921    the bitmap to that we should store it.  */
1922
1923 static struct ivopts_data *fd_ivopts_data;
1924 static tree
1925 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1926 {
1927   bitmap *depends_on = data;
1928   struct version_info *info;
1929
1930   if (TREE_CODE (*expr_p) != SSA_NAME)
1931     return NULL_TREE;
1932   info = name_info (fd_ivopts_data, *expr_p);
1933
1934   if (!info->inv_id || info->has_nonlin_use)
1935     return NULL_TREE;
1936
1937   if (!*depends_on)
1938     *depends_on = BITMAP_ALLOC (NULL);
1939   bitmap_set_bit (*depends_on, info->inv_id);
1940
1941   return NULL_TREE;
1942 }
1943
1944 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1945    position to POS.  If USE is not NULL, the candidate is set as related to
1946    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1947    replacement of the final value of the iv by a direct computation.  */
1948
1949 static struct iv_cand *
1950 add_candidate_1 (struct ivopts_data *data,
1951                  tree base, tree step, bool important, enum iv_position pos,
1952                  struct iv_use *use, tree incremented_at)
1953 {
1954   unsigned i;
1955   struct iv_cand *cand = NULL;
1956   tree type, orig_type;
1957
1958   if (base)
1959     {
1960       orig_type = TREE_TYPE (base);
1961       type = generic_type_for (orig_type);
1962       if (type != orig_type)
1963         {
1964           base = fold_convert (type, base);
1965           if (step)
1966             step = fold_convert (type, step);
1967         }
1968     }
1969
1970   for (i = 0; i < n_iv_cands (data); i++)
1971     {
1972       cand = iv_cand (data, i);
1973
1974       if (cand->pos != pos)
1975         continue;
1976
1977       if (cand->incremented_at != incremented_at)
1978         continue;
1979
1980       if (!cand->iv)
1981         {
1982           if (!base && !step)
1983             break;
1984
1985           continue;
1986         }
1987
1988       if (!base && !step)
1989         continue;
1990
1991       if (!operand_equal_p (base, cand->iv->base, 0))
1992         continue;
1993
1994       if (zero_p (cand->iv->step))
1995         {
1996           if (zero_p (step))
1997             break;
1998         }
1999       else
2000         {
2001           if (step && operand_equal_p (step, cand->iv->step, 0))
2002             break;
2003         }
2004     }
2005
2006   if (i == n_iv_cands (data))
2007     {
2008       cand = xcalloc (1, sizeof (struct iv_cand));
2009       cand->id = i;
2010
2011       if (!base && !step)
2012         cand->iv = NULL;
2013       else
2014         cand->iv = alloc_iv (base, step);
2015
2016       cand->pos = pos;
2017       if (pos != IP_ORIGINAL && cand->iv)
2018         {
2019           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2020           cand->var_after = cand->var_before;
2021         }
2022       cand->important = important;
2023       cand->incremented_at = incremented_at;
2024       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2025
2026       if (step
2027           && TREE_CODE (step) != INTEGER_CST)
2028         {
2029           fd_ivopts_data = data;
2030           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2031         }
2032
2033       if (dump_file && (dump_flags & TDF_DETAILS))
2034         dump_cand (dump_file, cand);
2035     }
2036
2037   if (important && !cand->important)
2038     {
2039       cand->important = true;
2040       if (dump_file && (dump_flags & TDF_DETAILS))
2041         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2042     }
2043
2044   if (use)
2045     {
2046       bitmap_set_bit (use->related_cands, i);
2047       if (dump_file && (dump_flags & TDF_DETAILS))
2048         fprintf (dump_file, "Candidate %d is related to use %d\n",
2049                  cand->id, use->id);
2050     }
2051
2052   return cand;
2053 }
2054
2055 /* Returns true if incrementing the induction variable at the end of the LOOP
2056    is allowed.
2057
2058    The purpose is to avoid splitting latch edge with a biv increment, thus
2059    creating a jump, possibly confusing other optimization passes and leaving
2060    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2061    is not available (so we do not have a better alternative), or if the latch
2062    edge is already nonempty.  */
2063
2064 static bool
2065 allow_ip_end_pos_p (struct loop *loop)
2066 {
2067   if (!ip_normal_pos (loop))
2068     return true;
2069
2070   if (!empty_block_p (ip_end_pos (loop)))
2071     return true;
2072
2073   return false;
2074 }
2075
2076 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2077    position to POS.  If USE is not NULL, the candidate is set as related to
2078    it.  The candidate computation is scheduled on all available positions.  */
2079
2080 static void
2081 add_candidate (struct ivopts_data *data,
2082                tree base, tree step, bool important, struct iv_use *use)
2083 {
2084   if (ip_normal_pos (data->current_loop))
2085     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2086   if (ip_end_pos (data->current_loop)
2087       && allow_ip_end_pos_p (data->current_loop))
2088     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2089 }
2090
2091 /* Add a standard "0 + 1 * iteration" iv candidate for a
2092    type with SIZE bits.  */
2093
2094 static void
2095 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2096                                      unsigned int size)
2097 {
2098   tree type = lang_hooks.types.type_for_size (size, true);
2099   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2100                  true, NULL);
2101 }
2102
2103 /* Adds standard iv candidates.  */
2104
2105 static void
2106 add_standard_iv_candidates (struct ivopts_data *data)
2107 {
2108   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2109
2110   /* The same for a double-integer type if it is still fast enough.  */
2111   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2112     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2113 }
2114
2115
2116 /* Adds candidates bases on the old induction variable IV.  */
2117
2118 static void
2119 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2120 {
2121   tree phi, def;
2122   struct iv_cand *cand;
2123
2124   add_candidate (data, iv->base, iv->step, true, NULL);
2125
2126   /* The same, but with initial value zero.  */
2127   add_candidate (data,
2128                  build_int_cst (TREE_TYPE (iv->base), 0),
2129                  iv->step, true, NULL);
2130
2131   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2132   if (TREE_CODE (phi) == PHI_NODE)
2133     {
2134       /* Additionally record the possibility of leaving the original iv
2135          untouched.  */
2136       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2137       cand = add_candidate_1 (data,
2138                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2139                               SSA_NAME_DEF_STMT (def));
2140       cand->var_before = iv->ssa_name;
2141       cand->var_after = def;
2142     }
2143 }
2144
2145 /* Adds candidates based on the old induction variables.  */
2146
2147 static void
2148 add_old_ivs_candidates (struct ivopts_data *data)
2149 {
2150   unsigned i;
2151   struct iv *iv;
2152   bitmap_iterator bi;
2153
2154   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2155     {
2156       iv = ver_info (data, i)->iv;
2157       if (iv && iv->biv_p && !zero_p (iv->step))
2158         add_old_iv_candidates (data, iv);
2159     }
2160 }
2161
2162 /* Adds candidates based on the value of the induction variable IV and USE.  */
2163
2164 static void
2165 add_iv_value_candidates (struct ivopts_data *data,
2166                          struct iv *iv, struct iv_use *use)
2167 {
2168   unsigned HOST_WIDE_INT offset;
2169   tree base;
2170
2171   add_candidate (data, iv->base, iv->step, false, use);
2172
2173   /* The same, but with initial value zero.  Make such variable important,
2174      since it is generic enough so that possibly many uses may be based
2175      on it.  */
2176   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2177                  iv->step, true, use);
2178
2179   /* Third, try removing the constant offset.  */
2180   base = strip_offset (iv->base, &offset);
2181   if (offset)
2182     add_candidate (data, base, iv->step, false, use);
2183 }
2184
2185 /* Possibly adds pseudocandidate for replacing the final value of USE by
2186    a direct computation.  */
2187
2188 static void
2189 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2190 {
2191   struct tree_niter_desc *niter;
2192
2193   /* We must know where we exit the loop and how many times does it roll.  */
2194   niter = niter_for_single_dom_exit (data);
2195   if (!niter
2196       || !zero_p (niter->may_be_zero))
2197     return;
2198
2199   add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2200 }
2201
2202 /* Adds candidates based on the uses.  */
2203
2204 static void
2205 add_derived_ivs_candidates (struct ivopts_data *data)
2206 {
2207   unsigned i;
2208
2209   for (i = 0; i < n_iv_uses (data); i++)
2210     {
2211       struct iv_use *use = iv_use (data, i);
2212
2213       if (!use)
2214         continue;
2215
2216       switch (use->type)
2217         {
2218         case USE_NONLINEAR_EXPR:
2219         case USE_COMPARE:
2220         case USE_ADDRESS:
2221           /* Just add the ivs based on the value of the iv used here.  */
2222           add_iv_value_candidates (data, use->iv, use);
2223           break;
2224
2225         case USE_OUTER:
2226           add_iv_value_candidates (data, use->iv, use);
2227
2228           /* Additionally, add the pseudocandidate for the possibility to
2229              replace the final value by a direct computation.  */
2230           add_iv_outer_candidates (data, use);
2231           break;
2232
2233         default:
2234           gcc_unreachable ();
2235         }
2236     }
2237 }
2238
2239 /* Record important candidates and add them to related_cands bitmaps
2240    if needed.  */
2241
2242 static void
2243 record_important_candidates (struct ivopts_data *data)
2244 {
2245   unsigned i;
2246   struct iv_use *use;
2247
2248   for (i = 0; i < n_iv_cands (data); i++)
2249     {
2250       struct iv_cand *cand = iv_cand (data, i);
2251
2252       if (cand->important)
2253         bitmap_set_bit (data->important_candidates, i);
2254     }
2255
2256   data->consider_all_candidates = (n_iv_cands (data)
2257                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2258
2259   if (data->consider_all_candidates)
2260     {
2261       /* We will not need "related_cands" bitmaps in this case,
2262          so release them to decrease peak memory consumption.  */
2263       for (i = 0; i < n_iv_uses (data); i++)
2264         {
2265           use = iv_use (data, i);
2266           BITMAP_FREE (use->related_cands);
2267         }
2268     }
2269   else
2270     {
2271       /* Add important candidates to the related_cands bitmaps.  */
2272       for (i = 0; i < n_iv_uses (data); i++)
2273         bitmap_ior_into (iv_use (data, i)->related_cands,
2274                          data->important_candidates);
2275     }
2276 }
2277
2278 /* Finds the candidates for the induction variables.  */
2279
2280 static void
2281 find_iv_candidates (struct ivopts_data *data)
2282 {
2283   /* Add commonly used ivs.  */
2284   add_standard_iv_candidates (data);
2285
2286   /* Add old induction variables.  */
2287   add_old_ivs_candidates (data);
2288
2289   /* Add induction variables derived from uses.  */
2290   add_derived_ivs_candidates (data);
2291
2292   /* Record the important candidates.  */
2293   record_important_candidates (data);
2294 }
2295
2296 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2297    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2298    we allocate a simple list to every use.  */
2299
2300 static void
2301 alloc_use_cost_map (struct ivopts_data *data)
2302 {
2303   unsigned i, size, s, j;
2304
2305   for (i = 0; i < n_iv_uses (data); i++)
2306     {
2307       struct iv_use *use = iv_use (data, i);
2308       bitmap_iterator bi;
2309
2310       if (data->consider_all_candidates)
2311         size = n_iv_cands (data);
2312       else
2313         {
2314           s = 0;
2315           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2316             {
2317               s++;
2318             }
2319
2320           /* Round up to the power of two, so that moduling by it is fast.  */
2321           for (size = 1; size < s; size <<= 1)
2322             continue;
2323         }
2324
2325       use->n_map_members = size;
2326       use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2327     }
2328 }
2329
2330 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2331    on invariants DEPENDS_ON and that the value used in expressing it
2332    is VALUE.*/
2333
2334 static void
2335 set_use_iv_cost (struct ivopts_data *data,
2336                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2337                  bitmap depends_on, tree value)
2338 {
2339   unsigned i, s;
2340
2341   if (cost == INFTY)
2342     {
2343       BITMAP_FREE (depends_on);
2344       return;
2345     }
2346
2347   if (data->consider_all_candidates)
2348     {
2349       use->cost_map[cand->id].cand = cand;
2350       use->cost_map[cand->id].cost = cost;
2351       use->cost_map[cand->id].depends_on = depends_on;
2352       use->cost_map[cand->id].value = value;
2353       return;
2354     }
2355
2356   /* n_map_members is a power of two, so this computes modulo.  */
2357   s = cand->id & (use->n_map_members - 1);
2358   for (i = s; i < use->n_map_members; i++)
2359     if (!use->cost_map[i].cand)
2360       goto found;
2361   for (i = 0; i < s; i++)
2362     if (!use->cost_map[i].cand)
2363       goto found;
2364
2365   gcc_unreachable ();
2366
2367 found:
2368   use->cost_map[i].cand = cand;
2369   use->cost_map[i].cost = cost;
2370   use->cost_map[i].depends_on = depends_on;
2371   use->cost_map[i].value = value;
2372 }
2373
2374 /* Gets cost of (USE, CANDIDATE) pair.  */
2375
2376 static struct cost_pair *
2377 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2378                  struct iv_cand *cand)
2379 {
2380   unsigned i, s;
2381   struct cost_pair *ret;
2382
2383   if (!cand)
2384     return NULL;
2385
2386   if (data->consider_all_candidates)
2387     {
2388       ret = use->cost_map + cand->id;
2389       if (!ret->cand)
2390         return NULL;
2391
2392       return ret;
2393     }
2394
2395   /* n_map_members is a power of two, so this computes modulo.  */
2396   s = cand->id & (use->n_map_members - 1);
2397   for (i = s; i < use->n_map_members; i++)
2398     if (use->cost_map[i].cand == cand)
2399       return use->cost_map + i;
2400
2401   for (i = 0; i < s; i++)
2402     if (use->cost_map[i].cand == cand)
2403       return use->cost_map + i;
2404
2405   return NULL;
2406 }
2407
2408 /* Returns estimate on cost of computing SEQ.  */
2409
2410 static unsigned
2411 seq_cost (rtx seq)
2412 {
2413   unsigned cost = 0;
2414   rtx set;
2415
2416   for (; seq; seq = NEXT_INSN (seq))
2417     {
2418       set = single_set (seq);
2419       if (set)
2420         cost += rtx_cost (set, SET);
2421       else
2422         cost++;
2423     }
2424
2425   return cost;
2426 }
2427
2428 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2429 static rtx
2430 produce_memory_decl_rtl (tree obj, int *regno)
2431 {
2432   rtx x;
2433
2434   gcc_assert (obj);
2435   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2436     {
2437       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2438       x = gen_rtx_SYMBOL_REF (Pmode, name);
2439     }
2440   else
2441     x = gen_raw_REG (Pmode, (*regno)++);
2442
2443   return gen_rtx_MEM (DECL_MODE (obj), x);
2444 }
2445
2446 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2447    walk_tree.  DATA contains the actual fake register number.  */
2448
2449 static tree
2450 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2451 {
2452   tree obj = NULL_TREE;
2453   rtx x = NULL_RTX;
2454   int *regno = data;
2455
2456   switch (TREE_CODE (*expr_p))
2457     {
2458     case ADDR_EXPR:
2459       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2460            handled_component_p (*expr_p);
2461            expr_p = &TREE_OPERAND (*expr_p, 0))
2462         continue;
2463       obj = *expr_p;
2464       if (DECL_P (obj))
2465         x = produce_memory_decl_rtl (obj, regno);
2466       break;
2467
2468     case SSA_NAME:
2469       *ws = 0;
2470       obj = SSA_NAME_VAR (*expr_p);
2471       if (!DECL_RTL_SET_P (obj))
2472         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2473       break;
2474
2475     case VAR_DECL:
2476     case PARM_DECL:
2477     case RESULT_DECL:
2478       *ws = 0;
2479       obj = *expr_p;
2480
2481       if (DECL_RTL_SET_P (obj))
2482         break;
2483
2484       if (DECL_MODE (obj) == BLKmode)
2485         x = produce_memory_decl_rtl (obj, regno);
2486       else
2487         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2488
2489       break;
2490
2491     default:
2492       break;
2493     }
2494
2495   if (x)
2496     {
2497       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2498       SET_DECL_RTL (obj, x);
2499     }
2500
2501   return NULL_TREE;
2502 }
2503
2504 /* Determines cost of the computation of EXPR.  */
2505
2506 static unsigned
2507 computation_cost (tree expr)
2508 {
2509   rtx seq, rslt;
2510   tree type = TREE_TYPE (expr);
2511   unsigned cost;
2512   /* Avoid using hard regs in ways which may be unsupported.  */
2513   int regno = LAST_VIRTUAL_REGISTER + 1;
2514
2515   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2516   start_sequence ();
2517   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2518   seq = get_insns ();
2519   end_sequence ();
2520
2521   cost = seq_cost (seq);
2522   if (MEM_P (rslt))
2523     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2524
2525   return cost;
2526 }
2527
2528 /* Returns variable containing the value of candidate CAND at statement AT.  */
2529
2530 static tree
2531 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2532 {
2533   if (stmt_after_increment (loop, cand, stmt))
2534     return cand->var_after;
2535   else
2536     return cand->var_before;
2537 }
2538
2539 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2540    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2541
2542 int
2543 tree_int_cst_sign_bit (tree t)
2544 {
2545   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2546   unsigned HOST_WIDE_INT w;
2547
2548   if (bitno < HOST_BITS_PER_WIDE_INT)
2549     w = TREE_INT_CST_LOW (t);
2550   else
2551     {
2552       w = TREE_INT_CST_HIGH (t);
2553       bitno -= HOST_BITS_PER_WIDE_INT;
2554     }
2555
2556   return (w >> bitno) & 1;
2557 }
2558
2559 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2560    return cst.  Otherwise return NULL_TREE.  */
2561
2562 static tree
2563 constant_multiple_of (tree type, tree top, tree bot)
2564 {
2565   tree res, mby, p0, p1;
2566   enum tree_code code;
2567   bool negate;
2568
2569   STRIP_NOPS (top);
2570   STRIP_NOPS (bot);
2571
2572   if (operand_equal_p (top, bot, 0))
2573     return build_int_cst (type, 1);
2574
2575   code = TREE_CODE (top);
2576   switch (code)
2577     {
2578     case MULT_EXPR:
2579       mby = TREE_OPERAND (top, 1);
2580       if (TREE_CODE (mby) != INTEGER_CST)
2581         return NULL_TREE;
2582
2583       res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2584       if (!res)
2585         return NULL_TREE;
2586
2587       return fold_binary_to_constant (MULT_EXPR, type, res,
2588                                       fold_convert (type, mby));
2589
2590     case PLUS_EXPR:
2591     case MINUS_EXPR:
2592       p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2593       if (!p0)
2594         return NULL_TREE;
2595       p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2596       if (!p1)
2597         return NULL_TREE;
2598
2599       return fold_binary_to_constant (code, type, p0, p1);
2600
2601     case INTEGER_CST:
2602       if (TREE_CODE (bot) != INTEGER_CST)
2603         return NULL_TREE;
2604
2605       bot = fold_convert (type, bot);
2606       top = fold_convert (type, top);
2607
2608       /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2609          the result afterwards.  */
2610       if (tree_int_cst_sign_bit (bot))
2611         {
2612           negate = true;
2613           bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2614         }
2615       else
2616         negate = false;
2617
2618       /* Ditto for TOP.  */
2619       if (tree_int_cst_sign_bit (top))
2620         {
2621           negate = !negate;
2622           top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2623         }
2624
2625       if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2626         return NULL_TREE;
2627
2628       res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2629       if (negate)
2630         res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2631       return res;
2632
2633     default:
2634       return NULL_TREE;
2635     }
2636 }
2637
2638 /* Affine combination of trees.  We keep track of at most MAX_AFF_ELTS elements
2639    to make things simpler; this is sufficient in most cases.  */
2640
2641 #define MAX_AFF_ELTS 8
2642
2643 struct affine_tree_combination
2644 {
2645   /* Type of the result of the combination.  */
2646   tree type;
2647
2648   /* Mask modulo that the operations are performed.  */
2649   unsigned HOST_WIDE_INT mask;
2650
2651   /* Constant offset.  */
2652   unsigned HOST_WIDE_INT offset;
2653
2654   /* Number of elements of the combination.  */
2655   unsigned n;
2656
2657   /* Elements and their coefficients.  */
2658   tree elts[MAX_AFF_ELTS];
2659   unsigned HOST_WIDE_INT coefs[MAX_AFF_ELTS];
2660
2661   /* Remainder of the expression.  */
2662   tree rest;
2663 };
2664
2665 /* Sets COMB to CST.  */
2666
2667 static void
2668 aff_combination_const (struct affine_tree_combination *comb, tree type,
2669                        unsigned HOST_WIDE_INT cst)
2670 {
2671   unsigned prec = TYPE_PRECISION (type);
2672
2673   comb->type = type;
2674   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2675
2676   comb->n = 0;
2677   comb->rest = NULL_TREE;
2678   comb->offset = cst & comb->mask;
2679 }
2680
2681 /* Sets COMB to single element ELT.  */
2682
2683 static void
2684 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2685 {
2686   unsigned prec = TYPE_PRECISION (type);
2687
2688   comb->type = type;
2689   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2690
2691   comb->n = 1;
2692   comb->elts[0] = elt;
2693   comb->coefs[0] = 1;
2694   comb->rest = NULL_TREE;
2695   comb->offset = 0;
2696 }
2697
2698 /* Scales COMB by SCALE.  */
2699
2700 static void
2701 aff_combination_scale (struct affine_tree_combination *comb,
2702                        unsigned HOST_WIDE_INT scale)
2703 {
2704   unsigned i, j;
2705
2706   if (scale == 1)
2707     return;
2708
2709   if (scale == 0)
2710     {
2711       aff_combination_const (comb, comb->type, 0);
2712       return;
2713     }
2714
2715   comb->offset = (scale * comb->offset) & comb->mask;
2716   for (i = 0, j = 0; i < comb->n; i++)
2717     {
2718       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2719       comb->elts[j] = comb->elts[i];
2720       if (comb->coefs[j] != 0)
2721         j++;
2722     }
2723   comb->n = j;
2724
2725   if (comb->rest)
2726     {
2727       if (comb->n < MAX_AFF_ELTS)
2728         {
2729           comb->coefs[comb->n] = scale;
2730           comb->elts[comb->n] = comb->rest;
2731           comb->rest = NULL_TREE;
2732           comb->n++;
2733         }
2734       else
2735         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2736                                   build_int_cst_type (comb->type, scale));
2737     }
2738 }
2739
2740 /* Adds ELT * SCALE to COMB.  */
2741
2742 static void
2743 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2744                          unsigned HOST_WIDE_INT scale)
2745 {
2746   unsigned i;
2747
2748   if (scale == 0)
2749     return;
2750
2751   for (i = 0; i < comb->n; i++)
2752     if (operand_equal_p (comb->elts[i], elt, 0))
2753       {
2754         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2755         if (comb->coefs[i])
2756           return;
2757
2758         comb->n--;
2759         comb->coefs[i] = comb->coefs[comb->n];
2760         comb->elts[i] = comb->elts[comb->n];
2761         return;
2762       }
2763   if (comb->n < MAX_AFF_ELTS)
2764     {
2765       comb->coefs[comb->n] = scale;
2766       comb->elts[comb->n] = elt;
2767       comb->n++;
2768       return;
2769     }
2770
2771   if (scale == 1)
2772     elt = fold_convert (comb->type, elt);
2773   else
2774     elt = fold_build2 (MULT_EXPR, comb->type,
2775                        fold_convert (comb->type, elt),
2776                        build_int_cst_type (comb->type, scale));
2777
2778   if (comb->rest)
2779     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2780   else
2781     comb->rest = elt;
2782 }
2783
2784 /* Adds COMB2 to COMB1.  */
2785
2786 static void
2787 aff_combination_add (struct affine_tree_combination *comb1,
2788                      struct affine_tree_combination *comb2)
2789 {
2790   unsigned i;
2791
2792   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2793   for (i = 0; i < comb2-> n; i++)
2794     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2795   if (comb2->rest)
2796     aff_combination_add_elt (comb1, comb2->rest, 1);
2797 }
2798
2799 /* Splits EXPR into an affine combination of parts.  */
2800
2801 static void
2802 tree_to_aff_combination (tree expr, tree type,
2803                          struct affine_tree_combination *comb)
2804 {
2805   struct affine_tree_combination tmp;
2806   enum tree_code code;
2807   tree cst, core, toffset;
2808   HOST_WIDE_INT bitpos, bitsize;
2809   enum machine_mode mode;
2810   int unsignedp, volatilep;
2811
2812   STRIP_NOPS (expr);
2813
2814   code = TREE_CODE (expr);
2815   switch (code)
2816     {
2817     case INTEGER_CST:
2818       aff_combination_const (comb, type, int_cst_value (expr));
2819       return;
2820
2821     case PLUS_EXPR:
2822     case MINUS_EXPR:
2823       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2824       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2825       if (code == MINUS_EXPR)
2826         aff_combination_scale (&tmp, -1);
2827       aff_combination_add (comb, &tmp);
2828       return;
2829
2830     case MULT_EXPR:
2831       cst = TREE_OPERAND (expr, 1);
2832       if (TREE_CODE (cst) != INTEGER_CST)
2833         break;
2834       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2835       aff_combination_scale (comb, int_cst_value (cst));
2836       return;
2837
2838     case NEGATE_EXPR:
2839       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2840       aff_combination_scale (comb, -1);
2841       return;
2842
2843     case ADDR_EXPR:
2844       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2845                                   &toffset, &mode, &unsignedp, &volatilep,
2846                                   false);
2847       if (bitpos % BITS_PER_UNIT != 0)
2848         break;
2849       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2850       core = build_addr_strip_iref (core);
2851       if (TREE_CODE (core) == ADDR_EXPR)
2852         aff_combination_add_elt (comb, core, 1);
2853       else
2854         {
2855           tree_to_aff_combination (core, type, &tmp);
2856           aff_combination_add (comb, &tmp);
2857         }
2858       if (toffset)
2859         {
2860           tree_to_aff_combination (toffset, type, &tmp);
2861           aff_combination_add (comb, &tmp);
2862         }
2863       return;
2864
2865     default:
2866       break;
2867     }
2868
2869   aff_combination_elt (comb, type, expr);
2870 }
2871
2872 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2873
2874 static tree
2875 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2876                  unsigned HOST_WIDE_INT mask)
2877 {
2878   enum tree_code code;
2879
2880   scale &= mask;
2881   elt = fold_convert (type, elt);
2882
2883   if (scale == 1)
2884     {
2885       if (!expr)
2886         return elt;
2887
2888       return fold_build2 (PLUS_EXPR, type, expr, elt);
2889     }
2890
2891   if (scale == mask)
2892     {
2893       if (!expr)
2894         return fold_build1 (NEGATE_EXPR, type, elt);
2895
2896       return fold_build2 (MINUS_EXPR, type, expr, elt);
2897     }
2898
2899   if (!expr)
2900     return fold_build2 (MULT_EXPR, type, elt,
2901                         build_int_cst_type (type, scale));
2902
2903   if ((scale | (mask >> 1)) == mask)
2904     {
2905       /* Scale is negative.  */
2906       code = MINUS_EXPR;
2907       scale = (-scale) & mask;
2908     }
2909   else
2910     code = PLUS_EXPR;
2911
2912   elt = fold_build2 (MULT_EXPR, type, elt,
2913                      build_int_cst_type (type, scale));
2914   return fold_build2 (code, type, expr, elt);
2915 }
2916
2917 /* Makes tree from the affine combination COMB.  */
2918
2919 static tree
2920 aff_combination_to_tree (struct affine_tree_combination *comb)
2921 {
2922   tree type = comb->type;
2923   tree expr = comb->rest;
2924   unsigned i;
2925   unsigned HOST_WIDE_INT off, sgn;
2926
2927   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2928
2929   for (i = 0; i < comb->n; i++)
2930     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2931                             comb->mask);
2932
2933   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2934     {
2935       /* Offset is negative.  */
2936       off = (-comb->offset) & comb->mask;
2937       sgn = comb->mask;
2938     }
2939   else
2940     {
2941       off = comb->offset;
2942       sgn = 1;
2943     }
2944   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2945                           comb->mask);
2946 }
2947
2948 /* Folds X + RATIO * Y in TYPE.  */
2949
2950 static tree
2951 fold_affine_sum (tree type, tree x, tree y, HOST_WIDE_INT ratio)
2952 {
2953   enum tree_code code;
2954   tree cst;
2955   struct affine_tree_combination cx, cy;
2956
2957   if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2958     {
2959       if (ratio == 1)
2960         return fold_build2 (PLUS_EXPR, type, x, y);
2961       if (ratio == -1)
2962         return fold_build2 (MINUS_EXPR, type, x, y);
2963
2964       if (ratio < 0)
2965         {
2966           code = MINUS_EXPR;
2967           ratio = -ratio;
2968         }
2969       else
2970         code = PLUS_EXPR;
2971
2972       cst = build_int_cst_type (type, ratio);
2973       y = fold_build2 (MULT_EXPR, type, y, cst);
2974       return fold_build2 (code, type, x, y);
2975     }
2976
2977   tree_to_aff_combination (x, type, &cx);
2978   tree_to_aff_combination (y, type, &cy);
2979   aff_combination_scale (&cy, ratio);
2980   aff_combination_add (&cx, &cy);
2981
2982   return aff_combination_to_tree (&cx);
2983 }
2984
2985 /* Determines the expression by that USE is expressed from induction variable
2986    CAND at statement AT in LOOP.  */
2987
2988 static tree
2989 get_computation_at (struct loop *loop,
2990                     struct iv_use *use, struct iv_cand *cand, tree at)
2991 {
2992   tree ubase = use->iv->base;
2993   tree ustep = use->iv->step;
2994   tree cbase = cand->iv->base;
2995   tree cstep = cand->iv->step;
2996   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2997   tree uutype;
2998   tree expr, delta;
2999   tree ratio;
3000   unsigned HOST_WIDE_INT ustepi, cstepi;
3001   HOST_WIDE_INT ratioi;
3002
3003   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3004     {
3005       /* We do not have a precision to express the values of use.  */
3006       return NULL_TREE;
3007     }
3008
3009   expr = var_at_stmt (loop, cand, at);
3010
3011   if (TREE_TYPE (expr) != ctype)
3012     {
3013       /* This may happen with the original ivs.  */
3014       expr = fold_convert (ctype, expr);
3015     }
3016
3017   if (TYPE_UNSIGNED (utype))
3018     uutype = utype;
3019   else
3020     {
3021       uutype = unsigned_type_for (utype);
3022       ubase = fold_convert (uutype, ubase);
3023       ustep = fold_convert (uutype, ustep);
3024     }
3025
3026   if (uutype != ctype)
3027     {
3028       expr = fold_convert (uutype, expr);
3029       cbase = fold_convert (uutype, cbase);
3030       cstep = fold_convert (uutype, cstep);
3031     }
3032
3033   if (cst_and_fits_in_hwi (cstep)
3034       && cst_and_fits_in_hwi (ustep))
3035     {
3036       ustepi = int_cst_value (ustep);
3037       cstepi = int_cst_value (cstep);
3038
3039       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3040         {
3041           /* TODO maybe consider case when ustep divides cstep and the ratio is
3042              a power of 2 (so that the division is fast to execute)?  We would
3043              need to be much more careful with overflows etc. then.  */
3044           return NULL_TREE;
3045         }
3046
3047       ratio = build_int_cst_type (uutype, ratioi);
3048     }
3049   else
3050     {
3051       ratio = constant_multiple_of (uutype, ustep, cstep);
3052       if (!ratio)
3053         return NULL_TREE;
3054
3055       /* Ratioi is only used to detect special cases when the multiplicative
3056          factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3057          we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
3058          to integer_onep/integer_all_onesp, since the former ignores
3059          TREE_OVERFLOW.  */
3060       if (cst_and_fits_in_hwi (ratio))
3061         ratioi = int_cst_value (ratio);
3062       else if (integer_onep (ratio))
3063         ratioi = 1;
3064       else if (integer_all_onesp (ratio))
3065         ratioi = -1;
3066       else
3067         ratioi = 0;
3068     }
3069
3070   /* We may need to shift the value if we are after the increment.  */
3071   if (stmt_after_increment (loop, cand, at))
3072     cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
3073
3074   /* use = ubase - ratio * cbase + ratio * var.
3075
3076      In general case ubase + ratio * (var - cbase) could be better (one less
3077      multiplication), but often it is possible to eliminate redundant parts
3078      of computations from (ubase - ratio * cbase) term, and if it does not
3079      happen, fold is able to apply the distributive law to obtain this form
3080      anyway.  */
3081
3082   if (ratioi == 1)
3083     {
3084       delta = fold_affine_sum (uutype, ubase, cbase, -1);
3085       expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3086     }
3087   else if (ratioi == -1)
3088     {
3089       delta = fold_affine_sum (uutype, ubase, cbase, 1);
3090       expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3091     }
3092   else
3093     {
3094       if (ratioi)
3095         delta = fold_affine_sum (uutype, ubase, cbase, -ratioi);
3096       else
3097         {
3098           delta = fold_build2 (MULT_EXPR, uutype, ratio, cbase);
3099           delta = fold_affine_sum (uutype, ubase, delta, -1);
3100         }
3101       expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3102       expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3103     }
3104
3105   return fold_convert (utype, expr);
3106 }
3107
3108 /* Determines the expression by that USE is expressed from induction variable
3109    CAND in LOOP.  */
3110
3111 static tree
3112 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3113 {
3114   return get_computation_at (loop, use, cand, use->stmt);
3115 }
3116
3117 /* Returns cost of addition in MODE.  */
3118
3119 static unsigned
3120 add_cost (enum machine_mode mode)
3121 {
3122   static unsigned costs[NUM_MACHINE_MODES];
3123   rtx seq;
3124   unsigned cost;
3125
3126   if (costs[mode])
3127     return costs[mode];
3128
3129   start_sequence ();
3130   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3131                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
3132                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
3133                  NULL_RTX);
3134   seq = get_insns ();
3135   end_sequence ();
3136
3137   cost = seq_cost (seq);
3138   if (!cost)
3139     cost = 1;
3140
3141   costs[mode] = cost;
3142
3143   if (dump_file && (dump_flags & TDF_DETAILS))
3144     fprintf (dump_file, "Addition in %s costs %d\n",
3145              GET_MODE_NAME (mode), cost);
3146   return cost;
3147 }
3148
3149 /* Entry in a hashtable of already known costs for multiplication.  */
3150 struct mbc_entry
3151 {
3152   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3153   enum machine_mode mode;       /* In mode.  */
3154   unsigned cost;                /* The cost.  */
3155 };
3156
3157 /* Counts hash value for the ENTRY.  */
3158
3159 static hashval_t
3160 mbc_entry_hash (const void *entry)
3161 {
3162   const struct mbc_entry *e = entry;
3163
3164   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3165 }
3166
3167 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3168
3169 static int
3170 mbc_entry_eq (const void *entry1, const void *entry2)
3171 {
3172   const struct mbc_entry *e1 = entry1;
3173   const struct mbc_entry *e2 = entry2;
3174
3175   return (e1->mode == e2->mode
3176           && e1->cst == e2->cst);
3177 }
3178
3179 /* Returns cost of multiplication by constant CST in MODE.  */
3180
3181 static unsigned
3182 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3183 {
3184   static htab_t costs;
3185   struct mbc_entry **cached, act;
3186   rtx seq;
3187   unsigned cost;
3188
3189   if (!costs)
3190     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3191
3192   act.mode = mode;
3193   act.cst = cst;
3194   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3195   if (*cached)
3196     return (*cached)->cost;
3197
3198   *cached = xmalloc (sizeof (struct mbc_entry));
3199   (*cached)->mode = mode;
3200   (*cached)->cst = cst;
3201
3202   start_sequence ();
3203   expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
3204                NULL_RTX, 0);
3205   seq = get_insns ();
3206   end_sequence ();
3207
3208   cost = seq_cost (seq);
3209
3210   if (dump_file && (dump_flags & TDF_DETAILS))
3211     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3212              (int) cst, GET_MODE_NAME (mode), cost);
3213
3214   (*cached)->cost = cost;
3215
3216   return cost;
3217 }
3218
3219 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3220    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3221    variable is omitted.  The created memory accesses MODE.
3222
3223    TODO -- there must be some better way.  This all is quite crude.  */
3224
3225 static unsigned
3226 get_address_cost (bool symbol_present, bool var_present,
3227                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3228 {
3229 #define MAX_RATIO 128
3230   static sbitmap valid_mult;
3231   static HOST_WIDE_INT rat, off;
3232   static HOST_WIDE_INT min_offset, max_offset;
3233   static unsigned costs[2][2][2][2];
3234   unsigned cost, acost;
3235   rtx seq, addr, base;
3236   bool offset_p, ratio_p;
3237   rtx reg1;
3238   HOST_WIDE_INT s_offset;
3239   unsigned HOST_WIDE_INT mask;
3240   unsigned bits;
3241
3242   if (!valid_mult)
3243     {
3244       HOST_WIDE_INT i;
3245
3246       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3247
3248       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3249       for (i = 1; i <= 1 << 20; i <<= 1)
3250         {
3251           XEXP (addr, 1) = GEN_INT (i);
3252           if (!memory_address_p (Pmode, addr))
3253             break;
3254         }
3255       max_offset = i >> 1;
3256       off = max_offset;
3257
3258       for (i = 1; i <= 1 << 20; i <<= 1)
3259         {
3260           XEXP (addr, 1) = GEN_INT (-i);
3261           if (!memory_address_p (Pmode, addr))
3262             break;
3263         }
3264       min_offset = -(i >> 1);
3265
3266       if (dump_file && (dump_flags & TDF_DETAILS))
3267         {
3268           fprintf (dump_file, "get_address_cost:\n");
3269           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3270           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3271         }
3272
3273       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3274       sbitmap_zero (valid_mult);
3275       rat = 1;
3276       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3277       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3278         {
3279           XEXP (addr, 1) = GEN_INT (i);
3280           if (memory_address_p (Pmode, addr))
3281             {
3282               SET_BIT (valid_mult, i + MAX_RATIO);
3283               rat = i;
3284             }
3285         }
3286
3287       if (dump_file && (dump_flags & TDF_DETAILS))
3288         {
3289           fprintf (dump_file, "  allowed multipliers:");
3290           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3291             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3292               fprintf (dump_file, " %d", (int) i);
3293           fprintf (dump_file, "\n");
3294           fprintf (dump_file, "\n");
3295         }
3296     }
3297
3298   bits = GET_MODE_BITSIZE (Pmode);
3299   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3300   offset &= mask;
3301   if ((offset >> (bits - 1) & 1))
3302     offset |= ~mask;
3303   s_offset = offset;
3304
3305   cost = 0;
3306   offset_p = (s_offset != 0
3307               && min_offset <= s_offset && s_offset <= max_offset);
3308   ratio_p = (ratio != 1
3309              && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
3310              && TEST_BIT (valid_mult, ratio + MAX_RATIO));
3311
3312   if (ratio != 1 && !ratio_p)
3313     cost += multiply_by_cost (ratio, Pmode);
3314
3315   if (s_offset && !offset_p && !symbol_present)
3316     {
3317       cost += add_cost (Pmode);
3318       var_present = true;
3319     }
3320
3321   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3322   if (!acost)
3323     {
3324       acost = 0;
3325
3326       addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3327       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
3328       if (ratio_p)
3329         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
3330
3331       if (var_present)
3332         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3333
3334       if (symbol_present)
3335         {
3336           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3337           if (offset_p)
3338             base = gen_rtx_fmt_e (CONST, Pmode,
3339                                   gen_rtx_fmt_ee (PLUS, Pmode,
3340                                                   base,
3341                                                   GEN_INT (off)));
3342         }
3343       else if (offset_p)
3344         base = GEN_INT (off);
3345       else
3346         base = NULL_RTX;
3347
3348       if (base)
3349         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3350
3351       start_sequence ();
3352       addr = memory_address (Pmode, addr);
3353       seq = get_insns ();
3354       end_sequence ();
3355
3356       acost = seq_cost (seq);
3357       acost += address_cost (addr, Pmode);
3358
3359       if (!acost)
3360         acost = 1;
3361       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3362     }
3363
3364   return cost + acost;
3365 }
3366 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3367    invariants the computation depends on.  */
3368
3369 static unsigned
3370 force_var_cost (struct ivopts_data *data,
3371                 tree expr, bitmap *depends_on)
3372 {
3373   static bool costs_initialized = false;
3374   static unsigned integer_cost;
3375   static unsigned symbol_cost;
3376   static unsigned address_cost;
3377   tree op0, op1;
3378   unsigned cost0, cost1, cost;
3379   enum machine_mode mode;
3380
3381   if (!costs_initialized)
3382     {
3383       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3384       rtx x = gen_rtx_MEM (DECL_MODE (var),
3385                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3386       tree addr;
3387       tree type = build_pointer_type (integer_type_node);
3388
3389       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3390                                                            2000));
3391
3392       SET_DECL_RTL (var, x);
3393       TREE_STATIC (var) = 1;
3394       addr = build1 (ADDR_EXPR, type, var);
3395       symbol_cost = computation_cost (addr) + 1;
3396
3397       address_cost
3398         = computation_cost (build2 (PLUS_EXPR, type,
3399                                     addr,
3400                                     build_int_cst_type (type, 2000))) + 1;
3401       if (dump_file && (dump_flags & TDF_DETAILS))
3402         {
3403           fprintf (dump_file, "force_var_cost:\n");
3404           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3405           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3406           fprintf (dump_file, "  address %d\n", (int) address_cost);
3407           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3408           fprintf (dump_file, "\n");
3409         }
3410
3411       costs_initialized = true;
3412     }
3413
3414   STRIP_NOPS (expr);
3415
3416   if (depends_on)
3417     {
3418       fd_ivopts_data = data;
3419       walk_tree (&expr, find_depends, depends_on, NULL);
3420     }
3421
3422   if (SSA_VAR_P (expr))
3423     return 0;
3424
3425   if (TREE_INVARIANT (expr))
3426     {
3427       if (TREE_CODE (expr) == INTEGER_CST)
3428         return integer_cost;
3429
3430       if (TREE_CODE (expr) == ADDR_EXPR)
3431         {
3432           tree obj = TREE_OPERAND (expr, 0);
3433
3434           if (TREE_CODE (obj) == VAR_DECL
3435               || TREE_CODE (obj) == PARM_DECL
3436               || TREE_CODE (obj) == RESULT_DECL)
3437             return symbol_cost;
3438         }
3439
3440       return address_cost;
3441     }
3442
3443   switch (TREE_CODE (expr))
3444     {
3445     case PLUS_EXPR:
3446     case MINUS_EXPR:
3447     case MULT_EXPR:
3448       op0 = TREE_OPERAND (expr, 0);
3449       op1 = TREE_OPERAND (expr, 1);
3450       STRIP_NOPS (op0);
3451       STRIP_NOPS (op1);
3452
3453       if (is_gimple_val (op0))
3454         cost0 = 0;
3455       else
3456         cost0 = force_var_cost (data, op0, NULL);
3457
3458       if (is_gimple_val (op1))
3459         cost1 = 0;
3460       else
3461         cost1 = force_var_cost (data, op1, NULL);
3462
3463       break;
3464
3465     default:
3466       /* Just an arbitrary value, FIXME.  */
3467       return target_spill_cost;
3468     }
3469
3470   mode = TYPE_MODE (TREE_TYPE (expr));
3471   switch (TREE_CODE (expr))
3472     {
3473     case PLUS_EXPR:
3474     case MINUS_EXPR:
3475       cost = add_cost (mode);
3476       break;
3477
3478     case MULT_EXPR:
3479       if (cst_and_fits_in_hwi (op0))
3480         cost = multiply_by_cost (int_cst_value (op0), mode);
3481       else if (cst_and_fits_in_hwi (op1))
3482         cost = multiply_by_cost (int_cst_value (op1), mode);
3483       else
3484         return target_spill_cost;
3485       break;
3486
3487     default:
3488       gcc_unreachable ();
3489     }
3490
3491   cost += cost0;
3492   cost += cost1;
3493
3494   /* Bound the cost by target_spill_cost.  The parts of complicated
3495      computations often are either loop invariant or at least can
3496      be shared between several iv uses, so letting this grow without
3497      limits would not give reasonable results.  */
3498   return cost < target_spill_cost ? cost : target_spill_cost;
3499 }
3500
3501 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3502    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3503    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3504    invariants the computation depends on.  */
3505
3506 static unsigned
3507 split_address_cost (struct ivopts_data *data,
3508                     tree addr, bool *symbol_present, bool *var_present,
3509                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3510 {
3511   tree core;
3512   HOST_WIDE_INT bitsize;
3513   HOST_WIDE_INT bitpos;
3514   tree toffset;
3515   enum machine_mode mode;
3516   int unsignedp, volatilep;
3517
3518   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3519                               &unsignedp, &volatilep, false);
3520
3521   if (toffset != 0
3522       || bitpos % BITS_PER_UNIT != 0
3523       || TREE_CODE (core) != VAR_DECL)
3524     {
3525       *symbol_present = false;
3526       *var_present = true;
3527       fd_ivopts_data = data;
3528       walk_tree (&addr, find_depends, depends_on, NULL);
3529       return target_spill_cost;
3530     }
3531
3532   *offset += bitpos / BITS_PER_UNIT;
3533   if (TREE_STATIC (core)
3534       || DECL_EXTERNAL (core))
3535     {
3536       *symbol_present = true;
3537       *var_present = false;
3538       return 0;
3539     }
3540
3541   *symbol_present = false;
3542   *var_present = true;
3543   return 0;
3544 }
3545
3546 /* Estimates cost of expressing difference of addresses E1 - E2 as
3547    var + symbol + offset.  The value of offset is added to OFFSET,
3548    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3549    part is missing.  DEPENDS_ON is a set of the invariants the computation
3550    depends on.  */
3551
3552 static unsigned
3553 ptr_difference_cost (struct ivopts_data *data,
3554                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3555                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3556 {
3557   HOST_WIDE_INT diff = 0;
3558   unsigned cost;
3559
3560   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3561
3562   if (ptr_difference_const (e1, e2, &diff))
3563     {
3564       *offset += diff;
3565       *symbol_present = false;
3566       *var_present = false;
3567       return 0;
3568     }
3569
3570   if (e2 == integer_zero_node)
3571     return split_address_cost (data, TREE_OPERAND (e1, 0),
3572                                symbol_present, var_present, offset, depends_on);
3573
3574   *symbol_present = false;
3575   *var_present = true;
3576
3577   cost = force_var_cost (data, e1, depends_on);
3578   cost += force_var_cost (data, e2, depends_on);
3579   cost += add_cost (Pmode);
3580
3581   return cost;
3582 }
3583
3584 /* Estimates cost of expressing difference E1 - E2 as
3585    var + symbol + offset.  The value of offset is added to OFFSET,
3586    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3587    part is missing.  DEPENDS_ON is a set of the invariants the computation
3588    depends on.  */
3589
3590 static unsigned
3591 difference_cost (struct ivopts_data *data,
3592                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3593                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3594 {
3595   unsigned cost;
3596   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3597   unsigned HOST_WIDE_INT off1, off2;
3598
3599   e1 = strip_offset (e1, &off1);
3600   e2 = strip_offset (e2, &off2);
3601   *offset += off1 - off2;
3602
3603   STRIP_NOPS (e1);
3604   STRIP_NOPS (e2);
3605
3606   if (TREE_CODE (e1) == ADDR_EXPR)
3607     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3608                                 depends_on);
3609   *symbol_present = false;
3610
3611   if (operand_equal_p (e1, e2, 0))
3612     {
3613       *var_present = false;
3614       return 0;
3615     }
3616   *var_present = true;
3617   if (zero_p (e2))
3618     return force_var_cost (data, e1, depends_on);
3619
3620   if (zero_p (e1))
3621     {
3622       cost = force_var_cost (data, e2, depends_on);
3623       cost += multiply_by_cost (-1, mode);
3624
3625       return cost;
3626     }
3627
3628   cost = force_var_cost (data, e1, depends_on);
3629   cost += force_var_cost (data, e2, depends_on);
3630   cost += add_cost (mode);
3631
3632   return cost;
3633 }
3634
3635 /* Determines the cost of the computation by that USE is expressed
3636    from induction variable CAND.  If ADDRESS_P is true, we just need
3637    to create an address from it, otherwise we want to get it into
3638    register.  A set of invariants we depend on is stored in
3639    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3640
3641 static unsigned
3642 get_computation_cost_at (struct ivopts_data *data,
3643                          struct iv_use *use, struct iv_cand *cand,
3644                          bool address_p, bitmap *depends_on, tree at)
3645 {
3646   tree ubase = use->iv->base, ustep = use->iv->step;
3647   tree cbase, cstep;
3648   tree utype = TREE_TYPE (ubase), ctype;
3649   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3650   HOST_WIDE_INT ratio, aratio;
3651   bool var_present, symbol_present;
3652   unsigned cost = 0, n_sums;
3653
3654   *depends_on = NULL;
3655
3656   /* Only consider real candidates.  */
3657   if (!cand->iv)
3658     return INFTY;
3659
3660   cbase = cand->iv->base;
3661   cstep = cand->iv->step;
3662   ctype = TREE_TYPE (cbase);
3663
3664   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3665     {
3666       /* We do not have a precision to express the values of use.  */
3667       return INFTY;
3668     }
3669
3670   if (address_p)
3671     {
3672       /* Do not try to express address of an object with computation based
3673          on address of a different object.  This may cause problems in rtl
3674          level alias analysis (that does not expect this to be happening,
3675          as this is illegal in C), and would be unlikely to be useful
3676          anyway.  */
3677       if (use->iv->base_object
3678           && cand->iv->base_object
3679           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3680         return INFTY;
3681     }
3682
3683   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3684     {
3685       /* TODO -- add direct handling of this case.  */
3686       goto fallback;
3687     }
3688
3689   /* CSTEPI is removed from the offset in case statement is after the
3690      increment.  If the step is not constant, we use zero instead.
3691      This is a bit imprecise (there is the extra addition), but
3692      redundancy elimination is likely to transform the code so that
3693      it uses value of the variable before increment anyway,
3694      so it is not that much unrealistic.  */
3695   if (cst_and_fits_in_hwi (cstep))
3696     cstepi = int_cst_value (cstep);
3697   else
3698     cstepi = 0;
3699
3700   if (cst_and_fits_in_hwi (ustep)
3701       && cst_and_fits_in_hwi (cstep))
3702     {
3703       ustepi = int_cst_value (ustep);
3704
3705       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3706         return INFTY;
3707     }
3708   else
3709     {
3710       tree rat;
3711
3712       rat = constant_multiple_of (utype, ustep, cstep);
3713
3714       if (!rat)
3715         return INFTY;
3716
3717       if (cst_and_fits_in_hwi (rat))
3718         ratio = int_cst_value (rat);
3719       else if (integer_onep (rat))
3720         ratio = 1;
3721       else if (integer_all_onesp (rat))
3722         ratio = -1;
3723       else
3724         return INFTY;
3725     }
3726
3727   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3728      or ratio == 1, it is better to handle this like
3729
3730      ubase - ratio * cbase + ratio * var
3731
3732      (also holds in the case ratio == -1, TODO.  */
3733
3734   if (cst_and_fits_in_hwi (cbase))
3735     {
3736       offset = - ratio * int_cst_value (cbase);
3737       cost += difference_cost (data,
3738                                ubase, integer_zero_node,
3739                                &symbol_present, &var_present, &offset,
3740                                depends_on);
3741     }
3742   else if (ratio == 1)
3743     {
3744       cost += difference_cost (data,
3745                                ubase, cbase,
3746                                &symbol_present, &var_present, &offset,
3747                                depends_on);
3748     }
3749   else
3750     {
3751       cost += force_var_cost (data, cbase, depends_on);
3752       cost += add_cost (TYPE_MODE (ctype));
3753       cost += difference_cost (data,
3754                                ubase, integer_zero_node,
3755                                &symbol_present, &var_present, &offset,
3756                                depends_on);
3757     }
3758
3759   /* If we are after the increment, the value of the candidate is higher by
3760      one iteration.  */
3761   if (stmt_after_increment (data->current_loop, cand, at))
3762     offset -= ratio * cstepi;
3763
3764   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3765      (symbol/var/const parts may be omitted).  If we are looking for an address,
3766      find the cost of addressing this.  */
3767   if (address_p)
3768     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3769
3770   /* Otherwise estimate the costs for computing the expression.  */
3771   aratio = ratio > 0 ? ratio : -ratio;
3772   if (!symbol_present && !var_present && !offset)
3773     {
3774       if (ratio != 1)
3775         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3776
3777       return cost;
3778     }
3779
3780   if (aratio != 1)
3781     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3782
3783   n_sums = 1;
3784   if (var_present
3785       /* Symbol + offset should be compile-time computable.  */
3786       && (symbol_present || offset))
3787     n_sums++;
3788
3789   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3790
3791 fallback:
3792   {
3793     /* Just get the expression, expand it and measure the cost.  */
3794     tree comp = get_computation_at (data->current_loop, use, cand, at);
3795
3796     if (!comp)
3797       return INFTY;
3798
3799     if (address_p)
3800       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3801
3802     return computation_cost (comp);
3803   }
3804 }
3805
3806 /* Determines the cost of the computation by that USE is expressed
3807    from induction variable CAND.  If ADDRESS_P is true, we just need
3808    to create an address from it, otherwise we want to get it into
3809    register.  A set of invariants we depend on is stored in
3810    DEPENDS_ON.  */
3811
3812 static unsigned
3813 get_computation_cost (struct ivopts_data *data,
3814                       struct iv_use *use, struct iv_cand *cand,
3815                       bool address_p, bitmap *depends_on)
3816 {
3817   return get_computation_cost_at (data,
3818                                   use, cand, address_p, depends_on, use->stmt);
3819 }
3820
3821 /* Determines cost of basing replacement of USE on CAND in a generic
3822    expression.  */
3823
3824 static bool
3825 determine_use_iv_cost_generic (struct ivopts_data *data,
3826                                struct iv_use *use, struct iv_cand *cand)
3827 {
3828   bitmap depends_on;
3829   unsigned cost;
3830
3831   /* The simple case first -- if we need to express value of the preserved
3832      original biv, the cost is 0.  This also prevents us from counting the
3833      cost of increment twice -- once at this use and once in the cost of
3834      the candidate.  */
3835   if (cand->pos == IP_ORIGINAL
3836       && cand->incremented_at == use->stmt)
3837     {
3838       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3839       return true;
3840     }
3841
3842   cost = get_computation_cost (data, use, cand, false, &depends_on);
3843   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3844
3845   return cost != INFTY;
3846 }
3847
3848 /* Determines cost of basing replacement of USE on CAND in an address.  */
3849
3850 static bool
3851 determine_use_iv_cost_address (struct ivopts_data *data,
3852                                struct iv_use *use, struct iv_cand *cand)
3853 {
3854   bitmap depends_on;
3855   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3856
3857   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3858
3859   return cost != INFTY;
3860 }
3861
3862 /* Computes value of induction variable IV in iteration NITER.  */
3863
3864 static tree
3865 iv_value (struct iv *iv, tree niter)
3866 {
3867   tree val;
3868   tree type = TREE_TYPE (iv->base);
3869
3870   niter = fold_convert (type, niter);
3871   val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3872
3873   return fold (build2 (PLUS_EXPR, type, iv->base, val));
3874 }
3875
3876 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3877
3878 static tree
3879 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3880 {
3881   tree val = iv_value (cand->iv, niter);
3882   tree type = TREE_TYPE (cand->iv->base);
3883
3884   if (stmt_after_increment (loop, cand, at))
3885     val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3886
3887   return val;
3888 }
3889
3890 /* Returns period of induction variable iv.  */
3891
3892 static tree
3893 iv_period (struct iv *iv)
3894 {
3895   tree step = iv->step, period, type;
3896   tree pow2div;
3897
3898   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3899
3900   /* Period of the iv is gcd (step, type range).  Since type range is power
3901      of two, it suffices to determine the maximum power of two that divides
3902      step.  */
3903   pow2div = num_ending_zeros (step);
3904   type = unsigned_type_for (TREE_TYPE (step));
3905
3906   period = build_low_bits_mask (type,
3907                                 (TYPE_PRECISION (type)
3908                                  - tree_low_cst (pow2div, 1)));
3909
3910   return period;
3911 }
3912
3913 /* Returns the comparison operator used when eliminating the iv USE.  */
3914
3915 static enum tree_code
3916 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3917 {
3918   struct loop *loop = data->current_loop;
3919   basic_block ex_bb;
3920   edge exit;
3921
3922   ex_bb = bb_for_stmt (use->stmt);
3923   exit = EDGE_SUCC (ex_bb, 0);
3924   if (flow_bb_inside_loop_p (loop, exit->dest))
3925     exit = EDGE_SUCC (ex_bb, 1);
3926
3927   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3928 }
3929
3930 /* Check whether it is possible to express the condition in USE by comparison
3931    of candidate CAND.  If so, store the value compared with to BOUND.  */
3932
3933 static bool
3934 may_eliminate_iv (struct ivopts_data *data,
3935                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3936 {
3937   basic_block ex_bb;
3938   edge exit;
3939   struct tree_niter_desc *niter;
3940   tree nit, nit_type;
3941   tree wider_type, period, per_type;
3942   struct loop *loop = data->current_loop;
3943
3944   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3945     return false;
3946
3947   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3948      for other conditions inside loop body.  */
3949   ex_bb = bb_for_stmt (use->stmt);
3950   if (use->stmt != last_stmt (ex_bb)
3951       || TREE_CODE (use->stmt) != COND_EXPR)
3952     return false;
3953   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3954     return false;
3955
3956   exit = EDGE_SUCC (ex_bb, 0);
3957   if (flow_bb_inside_loop_p (loop, exit->dest))
3958     exit = EDGE_SUCC (ex_bb, 1);
3959   if (flow_bb_inside_loop_p (loop, exit->dest))
3960     return false;
3961
3962   niter = niter_for_exit (data, exit);
3963   if (!niter
3964       || !zero_p (niter->may_be_zero))
3965     return false;
3966
3967   nit = niter->niter;
3968   nit_type = TREE_TYPE (nit);
3969
3970   /* Determine whether we may use the variable to test whether niter iterations
3971      elapsed.  This is the case iff the period of the induction variable is
3972      greater than the number of iterations.  */
3973   period = iv_period (cand->iv);
3974   if (!period)
3975     return false;
3976   per_type = TREE_TYPE (period);
3977
3978   wider_type = TREE_TYPE (period);
3979   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3980     wider_type = per_type;
3981   else
3982     wider_type = nit_type;
3983
3984   if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3985                                        fold_convert (wider_type, period),
3986                                        fold_convert (wider_type, nit)))))
3987     return false;
3988
3989   *bound = cand_value_at (loop, cand, use->stmt, nit);
3990   return true;
3991 }
3992
3993 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3994
3995 static bool
3996 determine_use_iv_cost_condition (struct ivopts_data *data,
3997                                  struct iv_use *use, struct iv_cand *cand)
3998 {
3999   tree bound = NULL_TREE, op, cond;
4000   bitmap depends_on = NULL;
4001   unsigned cost;
4002
4003   /* Only consider real candidates.  */
4004   if (!cand->iv)
4005     {
4006       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4007       return false;
4008     }
4009
4010   if (may_eliminate_iv (data, use, cand, &bound))
4011     {
4012       cost = force_var_cost (data, bound, &depends_on);
4013
4014       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4015       return cost != INFTY;
4016     }
4017
4018   /* The induction variable elimination failed; just express the original
4019      giv.  If it is compared with an invariant, note that we cannot get
4020      rid of it.  */
4021   cost = get_computation_cost (data, use, cand, false, &depends_on);
4022
4023   cond = *use->op_p;
4024   if (TREE_CODE (cond) != SSA_NAME)
4025     {
4026       op = TREE_OPERAND (cond, 0);
4027       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4028         op = TREE_OPERAND (cond, 1);
4029       if (TREE_CODE (op) == SSA_NAME)
4030         {
4031           op = get_iv (data, op)->base;
4032           fd_ivopts_data = data;
4033           walk_tree (&op, find_depends, &depends_on, NULL);
4034         }
4035     }
4036
4037   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4038   return cost != INFTY;
4039 }
4040
4041 /* Checks whether it is possible to replace the final value of USE by
4042    a direct computation.  If so, the formula is stored to *VALUE.  */
4043
4044 static bool
4045 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4046                          tree *value)
4047 {
4048   struct loop *loop = data->current_loop;
4049   edge exit;
4050   struct tree_niter_desc *niter;
4051
4052   exit = single_dom_exit (loop);
4053   if (!exit)
4054     return false;
4055
4056   gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4057                               bb_for_stmt (use->stmt)));
4058
4059   niter = niter_for_single_dom_exit (data);
4060   if (!niter
4061       || !zero_p (niter->may_be_zero))
4062     return false;
4063
4064   *value = iv_value (use->iv, niter->niter);
4065
4066   return true;
4067 }
4068
4069 /* Determines cost of replacing final value of USE using CAND.  */
4070
4071 static bool
4072 determine_use_iv_cost_outer (struct ivopts_data *data,
4073                              struct iv_use *use, struct iv_cand *cand)
4074 {
4075   bitmap depends_on;
4076   unsigned cost;
4077   edge exit;
4078   tree value = NULL_TREE;
4079   struct loop *loop = data->current_loop;
4080
4081   /* The simple case first -- if we need to express value of the preserved
4082      original biv, the cost is 0.  This also prevents us from counting the
4083      cost of increment twice -- once at this use and once in the cost of
4084      the candidate.  */
4085   if (cand->pos == IP_ORIGINAL
4086       && cand->incremented_at == use->stmt)
4087     {
4088       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4089       return true;
4090     }
4091
4092   if (!cand->iv)
4093     {
4094       if (!may_replace_final_value (data, use, &value))
4095         {
4096           set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4097           return false;
4098         }
4099
4100       depends_on = NULL;
4101       cost = force_var_cost (data, value, &depends_on);
4102
4103       cost /= AVG_LOOP_NITER (loop);
4104
4105       set_use_iv_cost (data, use, cand, cost, depends_on, value);
4106       return cost != INFTY;
4107     }
4108
4109   exit = single_dom_exit (loop);
4110   if (exit)
4111     {
4112       /* If there is just a single exit, we may use value of the candidate
4113          after we take it to determine the value of use.  */
4114       cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4115                                       last_stmt (exit->src));
4116       if (cost != INFTY)
4117         cost /= AVG_LOOP_NITER (loop);
4118     }
4119   else
4120     {
4121       /* Otherwise we just need to compute the iv.  */
4122       cost = get_computation_cost (data, use, cand, false, &depends_on);
4123     }
4124
4125   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4126
4127   return cost != INFTY;
4128 }
4129
4130 /* Determines cost of basing replacement of USE on CAND.  Returns false
4131    if USE cannot be based on CAND.  */
4132
4133 static bool
4134 determine_use_iv_cost (struct ivopts_data *data,
4135                        struct iv_use *use, struct iv_cand *cand)
4136 {
4137   switch (use->type)
4138     {
4139     case USE_NONLINEAR_EXPR:
4140       return determine_use_iv_cost_generic (data, use, cand);
4141
4142     case USE_OUTER:
4143       return determine_use_iv_cost_outer (data, use, cand);
4144
4145     case USE_ADDRESS:
4146       return determine_use_iv_cost_address (data, use, cand);
4147
4148     case USE_COMPARE:
4149       return determine_use_iv_cost_condition (data, use, cand);
4150
4151     default:
4152       gcc_unreachable ();
4153     }
4154 }
4155
4156 /* Determines costs of basing the use of the iv on an iv candidate.  */
4157
4158 static void
4159 determine_use_iv_costs (struct ivopts_data *data)
4160 {
4161   unsigned i, j;
4162   struct iv_use *use;
4163   struct iv_cand *cand;
4164   bitmap to_clear = BITMAP_ALLOC (NULL);
4165
4166   alloc_use_cost_map (data);
4167
4168   for (i = 0; i < n_iv_uses (data); i++)
4169     {
4170       use = iv_use (data, i);
4171
4172       if (data->consider_all_candidates)
4173         {
4174           for (j = 0; j < n_iv_cands (data); j++)
4175             {
4176               cand = iv_cand (data, j);
4177               determine_use_iv_cost (data, use, cand);
4178             }
4179         }
4180       else
4181         {
4182           bitmap_iterator bi;
4183
4184           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4185             {
4186               cand = iv_cand (data, j);
4187               if (!determine_use_iv_cost (data, use, cand))
4188                 bitmap_set_bit (to_clear, j);
4189             }
4190
4191           /* Remove the candidates for that the cost is infinite from
4192              the list of related candidates.  */
4193           bitmap_and_compl_into (use->related_cands, to_clear);
4194           bitmap_clear (to_clear);
4195         }
4196     }
4197
4198   BITMAP_FREE (to_clear);
4199
4200   if (dump_file && (dump_flags & TDF_DETAILS))
4201     {
4202       fprintf (dump_file, "Use-candidate costs:\n");
4203
4204       for (i = 0; i < n_iv_uses (data); i++)
4205         {
4206           use = iv_use (data, i);
4207
4208           fprintf (dump_file, "Use %d:\n", i);
4209           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4210           for (j = 0; j < use->n_map_members; j++)
4211             {
4212               if (!use->cost_map[j].cand
4213                   || use->cost_map[j].cost == INFTY)
4214                 continue;
4215
4216               fprintf (dump_file, "  %d\t%d\t",
4217                        use->cost_map[j].cand->id,
4218                        use->cost_map[j].cost);
4219               if (use->cost_map[j].depends_on)
4220                 bitmap_print (dump_file,
4221                               use->cost_map[j].depends_on, "","");
4222               fprintf (dump_file, "\n");
4223             }
4224
4225           fprintf (dump_file, "\n");
4226         }
4227       fprintf (dump_file, "\n");
4228     }
4229 }
4230
4231 /* Determines cost of the candidate CAND.  */
4232
4233 static void
4234 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4235 {
4236   unsigned cost_base, cost_step;
4237   tree base;
4238
4239   if (!cand->iv)
4240     {
4241       cand->cost = 0;
4242       return;
4243     }
4244
4245   /* There are two costs associated with the candidate -- its increment
4246      and its initialization.  The second is almost negligible for any loop
4247      that rolls enough, so we take it just very little into account.  */
4248
4249   base = cand->iv->base;
4250   cost_base = force_var_cost (data, base, NULL);
4251   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4252
4253   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4254
4255   /* Prefer the original iv unless we may gain something by replacing it;
4256      this is not really relevant for artificial ivs created by other
4257      passes.  */
4258   if (cand->pos == IP_ORIGINAL
4259       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4260     cand->cost--;
4261
4262   /* Prefer not to insert statements into latch unless there are some
4263      already (so that we do not create unnecessary jumps).  */
4264   if (cand->pos == IP_END
4265       && empty_block_p (ip_end_pos (data->current_loop)))
4266     cand->cost++;
4267 }
4268
4269 /* Determines costs of computation of the candidates.  */
4270
4271 static void
4272 determine_iv_costs (struct ivopts_data *data)
4273 {
4274   unsigned i;
4275
4276   if (dump_file && (dump_flags & TDF_DETAILS))
4277     {
4278       fprintf (dump_file, "Candidate costs:\n");
4279       fprintf (dump_file, "  cand\tcost\n");
4280     }
4281
4282   for (i = 0; i < n_iv_cands (data); i++)
4283     {
4284       struct iv_cand *cand = iv_cand (data, i);
4285
4286       determine_iv_cost (data, cand);
4287
4288       if (dump_file && (dump_flags & TDF_DETAILS))
4289         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4290     }
4291
4292 if (dump_file && (dump_flags & TDF_DETAILS))
4293       fprintf (dump_file, "\n");
4294 }
4295
4296 /* Calculates cost for having SIZE induction variables.  */
4297
4298 static unsigned
4299 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4300 {
4301   return global_cost_for_size (size,
4302                                loop_data (data->current_loop)->regs_used,
4303                                n_iv_uses (data));
4304 }
4305
4306 /* For each size of the induction variable set determine the penalty.  */
4307
4308 static void
4309 determine_set_costs (struct ivopts_data *data)
4310 {
4311   unsigned j, n;
4312   tree phi, op;
4313   struct loop *loop = data->current_loop;
4314   bitmap_iterator bi;
4315
4316   /* We use the following model (definitely improvable, especially the
4317      cost function -- TODO):
4318
4319      We estimate the number of registers available (using MD data), name it A.
4320
4321      We estimate the number of registers used by the loop, name it U.  This
4322      number is obtained as the number of loop phi nodes (not counting virtual
4323      registers and bivs) + the number of variables from outside of the loop.
4324
4325      We set a reserve R (free regs that are used for temporary computations,
4326      etc.).  For now the reserve is a constant 3.
4327
4328      Let I be the number of induction variables.
4329
4330      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4331         make a lot of ivs without a reason).
4332      -- if A - R < U + I <= A, the cost is I * PRES_COST
4333      -- if U + I > A, the cost is I * PRES_COST and
4334         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4335
4336   if (dump_file && (dump_flags & TDF_DETAILS))
4337     {
4338       fprintf (dump_file, "Global costs:\n");
4339       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4340       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4341       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4342       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4343     }
4344
4345   n = 0;
4346   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4347     {
4348       op = PHI_RESULT (phi);
4349
4350       if (!is_gimple_reg (op))
4351         continue;
4352
4353       if (get_iv (data, op))
4354         continue;
4355
4356       n++;
4357     }
4358
4359   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4360     {
4361       struct version_info *info = ver_info (data, j);
4362
4363       if (info->inv_id && info->has_nonlin_use)
4364         n++;
4365     }
4366
4367   loop_data (loop)->regs_used = n;
4368   if (dump_file && (dump_flags & TDF_DETAILS))
4369     fprintf (dump_file, "  regs_used %d\n", n);
4370
4371   if (dump_file && (dump_flags & TDF_DETAILS))
4372     {
4373       fprintf (dump_file, "  cost for size:\n");
4374       fprintf (dump_file, "  ivs\tcost\n");
4375       for (j = 0; j <= 2 * target_avail_regs; j++)
4376         fprintf (dump_file, "  %d\t%d\n", j,
4377                  ivopts_global_cost_for_size (data, j));
4378       fprintf (dump_file, "\n");
4379     }
4380 }
4381
4382 /* Returns true if A is a cheaper cost pair than B.  */
4383
4384 static bool
4385 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4386 {
4387   if (!a)
4388     return false;
4389
4390   if (!b)
4391     return true;
4392
4393   if (a->cost < b->cost)
4394     return true;
4395
4396   if (a->cost > b->cost)
4397     return false;
4398
4399   /* In case the costs are the same, prefer the cheaper candidate.  */
4400   if (a->cand->cost < b->cand->cost)
4401     return true;
4402
4403   return false;
4404 }
4405
4406 /* Computes the cost field of IVS structure.  */
4407
4408 static void
4409 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4410 {
4411   unsigned cost = 0;
4412
4413   cost += ivs->cand_use_cost;
4414   cost += ivs->cand_cost;
4415   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4416
4417   ivs->cost = cost;
4418 }
4419
4420 /* Remove invariants in set INVS to set IVS.  */
4421
4422 static void
4423 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4424 {
4425   bitmap_iterator bi;
4426   unsigned iid;
4427
4428   if (!invs)
4429     return;
4430
4431   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4432     {
4433       ivs->n_invariant_uses[iid]--;
4434       if (ivs->n_invariant_uses[iid] == 0)
4435         ivs->n_regs--;
4436     }
4437 }
4438
4439 /* Set USE not to be expressed by any candidate in IVS.  */
4440
4441 static void
4442 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4443                  struct iv_use *use)
4444 {
4445   unsigned uid = use->id, cid;
4446   struct cost_pair *cp;
4447
4448   cp = ivs->cand_for_use[uid];
4449   if (!cp)
4450     return;
4451   cid = cp->cand->id;
4452
4453   ivs->bad_uses++;
4454   ivs->cand_for_use[uid] = NULL;
4455   ivs->n_cand_uses[cid]--;
4456
4457   if (ivs->n_cand_uses[cid] == 0)
4458     {
4459       bitmap_clear_bit (ivs->cands, cid);
4460       /* Do not count the pseudocandidates.  */
4461       if (cp->cand->iv)
4462         ivs->n_regs--;
4463       ivs->n_cands--;
4464       ivs->cand_cost -= cp->cand->cost;
4465
4466       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4467     }
4468
4469   ivs->cand_use_cost -= cp->cost;
4470
4471   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4472   iv_ca_recount_cost (data, ivs);
4473 }
4474
4475 /* Add invariants in set INVS to set IVS.  */
4476
4477 static void
4478 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4479 {
4480   bitmap_iterator bi;
4481   unsigned iid;
4482
4483   if (!invs)
4484     return;
4485
4486   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4487     {
4488       ivs->n_invariant_uses[iid]++;
4489       if (ivs->n_invariant_uses[iid] == 1)
4490         ivs->n_regs++;
4491     }
4492 }
4493
4494 /* Set cost pair for USE in set IVS to CP.  */
4495
4496 static void
4497 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4498               struct iv_use *use, struct cost_pair *cp)
4499 {
4500   unsigned uid = use->id, cid;
4501
4502   if (ivs->cand_for_use[uid] == cp)
4503     return;
4504
4505   if (ivs->cand_for_use[uid])
4506     iv_ca_set_no_cp (data, ivs, use);
4507
4508   if (cp)
4509     {
4510       cid = cp->cand->id;
4511
4512       ivs->bad_uses--;
4513       ivs->cand_for_use[uid] = cp;
4514       ivs->n_cand_uses[cid]++;
4515       if (ivs->n_cand_uses[cid] == 1)
4516         {
4517           bitmap_set_bit (ivs->cands, cid);
4518           /* Do not count the pseudocandidates.  */
4519           if (cp->cand->iv)
4520             ivs->n_regs++;
4521           ivs->n_cands++;
4522           ivs->cand_cost += cp->cand->cost;
4523
4524           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4525         }
4526
4527       ivs->cand_use_cost += cp->cost;
4528       iv_ca_set_add_invariants (ivs, cp->depends_on);
4529       iv_ca_recount_cost (data, ivs);
4530     }
4531 }
4532
4533 /* Extend set IVS by expressing USE by some of the candidates in it
4534    if possible.  */
4535
4536 static void
4537 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4538                struct iv_use *use)
4539 {
4540   struct cost_pair *best_cp = NULL, *cp;
4541   bitmap_iterator bi;
4542   unsigned i;
4543
4544   gcc_assert (ivs->upto >= use->id);
4545
4546   if (ivs->upto == use->id)
4547     {
4548       ivs->upto++;
4549       ivs->bad_uses++;
4550     }
4551
4552   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4553     {
4554       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4555
4556       if (cheaper_cost_pair (cp, best_cp))
4557         best_cp = cp;
4558     }
4559
4560   iv_ca_set_cp (data, ivs, use, best_cp);
4561 }
4562
4563 /* Get cost for assignment IVS.  */
4564
4565 static unsigned
4566 iv_ca_cost (struct iv_ca *ivs)
4567 {
4568   return (ivs->bad_uses ? INFTY : ivs->cost);
4569 }
4570
4571 /* Returns true if all dependences of CP are among invariants in IVS.  */
4572
4573 static bool
4574 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4575 {
4576   unsigned i;
4577   bitmap_iterator bi;
4578
4579   if (!cp->depends_on)
4580     return true;
4581
4582   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4583     {
4584       if (ivs->n_invariant_uses[i] == 0)
4585         return false;
4586     }
4587
4588   return true;
4589 }
4590
4591 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4592    it before NEXT_CHANGE.  */
4593
4594 static struct iv_ca_delta *
4595 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4596                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4597 {
4598   struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4599
4600   change->use = use;
4601   change->old_cp = old_cp;
4602   change->new_cp = new_cp;
4603   change->next_change = next_change;
4604
4605   return change;
4606 }
4607
4608 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4609    are rewritten.  */
4610
4611 static struct iv_ca_delta *
4612 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4613 {
4614   struct iv_ca_delta *last;
4615
4616   if (!l2)
4617     return l1;
4618
4619   if (!l1)
4620     return l2;
4621
4622   for (last = l1; last->next_change; last = last->next_change)
4623     continue;
4624   last->next_change = l2;
4625
4626   return l1;
4627 }
4628
4629 /* Returns candidate by that USE is expressed in IVS.  */
4630
4631 static struct cost_pair *
4632 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4633 {
4634   return ivs->cand_for_use[use->id];
4635 }
4636
4637 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4638
4639 static struct iv_ca_delta *
4640 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4641 {
4642   struct iv_ca_delta *act, *next, *prev = NULL;
4643   struct cost_pair *tmp;
4644
4645   for (act = delta; act; act = next)
4646     {
4647       next = act->next_change;
4648       act->next_change = prev;
4649       prev = act;
4650
4651       tmp = act->old_cp;
4652       act->old_cp = act->new_cp;
4653       act->new_cp = tmp;
4654     }
4655
4656   return prev;
4657 }
4658
4659 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4660    reverted instead.  */
4661
4662 static void
4663 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4664                     struct iv_ca_delta *delta, bool forward)
4665 {
4666   struct cost_pair *from, *to;
4667   struct iv_ca_delta *act;
4668
4669   if (!forward)
4670     delta = iv_ca_delta_reverse (delta);
4671
4672   for (act = delta; act; act = act->next_change)
4673     {
4674       from = act->old_cp;
4675       to = act->new_cp;
4676       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4677       iv_ca_set_cp (data, ivs, act->use, to);
4678     }
4679
4680   if (!forward)
4681     iv_ca_delta_reverse (delta);
4682 }
4683
4684 /* Returns true if CAND is used in IVS.  */
4685
4686 static bool
4687 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4688 {
4689   return ivs->n_cand_uses[cand->id] > 0;
4690 }
4691
4692 /* Returns number of induction variable candidates in the set IVS.  */
4693
4694 static unsigned
4695 iv_ca_n_cands (struct iv_ca *ivs)
4696 {
4697   return ivs->n_cands;
4698 }
4699
4700 /* Free the list of changes DELTA.  */
4701
4702 static void
4703 iv_ca_delta_free (struct iv_ca_delta **delta)
4704 {
4705   struct iv_ca_delta *act, *next;
4706
4707   for (act = *delta; act; act = next)
4708     {
4709       next = act->next_change;
4710       free (act);
4711     }
4712
4713   *delta = NULL;
4714 }
4715
4716 /* Allocates new iv candidates assignment.  */
4717
4718 static struct iv_ca *
4719 iv_ca_new (struct ivopts_data *data)
4720 {
4721   struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4722
4723   nw->upto = 0;
4724   nw->bad_uses = 0;
4725   nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4726   nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4727   nw->cands = BITMAP_ALLOC (NULL);
4728   nw->n_cands = 0;
4729   nw->n_regs = 0;
4730   nw->cand_use_cost = 0;
4731   nw->cand_cost = 0;
4732   nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4733   nw->cost = 0;
4734
4735   return nw;
4736 }
4737
4738 /* Free memory occupied by the set IVS.  */
4739
4740 static void
4741 iv_ca_free (struct iv_ca **ivs)
4742 {
4743   free ((*ivs)->cand_for_use);
4744   free ((*ivs)->n_cand_uses);
4745   BITMAP_FREE ((*ivs)->cands);
4746   free ((*ivs)->n_invariant_uses);
4747   free (*ivs);
4748   *ivs = NULL;
4749 }
4750
4751 /* Dumps IVS to FILE.  */
4752
4753 static void
4754 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4755 {
4756   const char *pref = "  invariants ";
4757   unsigned i;
4758
4759   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4760   bitmap_print (file, ivs->cands, "  candidates ","\n");
4761
4762   for (i = 1; i <= data->max_inv_id; i++)
4763     if (ivs->n_invariant_uses[i])
4764       {
4765         fprintf (file, "%s%d", pref, i);
4766         pref = ", ";
4767       }
4768   fprintf (file, "\n");
4769 }
4770
4771 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4772    new set, and store differences in DELTA.  Number of induction variables
4773    in the new set is stored to N_IVS.  */
4774
4775 static unsigned
4776 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4777               struct iv_cand *cand, struct iv_ca_delta **delta,
4778               unsigned *n_ivs)
4779 {
4780   unsigned i, cost;
4781   struct iv_use *use;
4782   struct cost_pair *old_cp, *new_cp;
4783
4784   *delta = NULL;
4785   for (i = 0; i < ivs->upto; i++)
4786     {
4787       use = iv_use (data, i);
4788       old_cp = iv_ca_cand_for_use (ivs, use);
4789
4790       if (old_cp
4791           && old_cp->cand == cand)
4792         continue;
4793
4794       new_cp = get_use_iv_cost (data, use, cand);
4795       if (!new_cp)
4796         continue;
4797
4798       if (!iv_ca_has_deps (ivs, new_cp))
4799         continue;
4800
4801       if (!cheaper_cost_pair (new_cp, old_cp))
4802         continue;
4803
4804       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4805     }
4806
4807   iv_ca_delta_commit (data, ivs, *delta, true);
4808   cost = iv_ca_cost (ivs);
4809   if (n_ivs)
4810     *n_ivs = iv_ca_n_cands (ivs);
4811   iv_ca_delta_commit (data, ivs, *delta, false);
4812
4813   return cost;
4814 }
4815
4816 /* Try narrowing set IVS by removing CAND.  Return the cost of
4817    the new set and store the differences in DELTA.  */
4818
4819 static unsigned
4820 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4821               struct iv_cand *cand, struct iv_ca_delta **delta)
4822 {
4823   unsigned i, ci;
4824   struct iv_use *use;
4825   struct cost_pair *old_cp, *new_cp, *cp;
4826   bitmap_iterator bi;
4827   struct iv_cand *cnd;
4828   unsigned cost;
4829
4830   *delta = NULL;
4831   for (i = 0; i < n_iv_uses (data); i++)
4832     {
4833       use = iv_use (data, i);
4834
4835       old_cp = iv_ca_cand_for_use (ivs, use);
4836       if (old_cp->cand != cand)
4837         continue;
4838
4839       new_cp = NULL;
4840
4841       if (data->consider_all_candidates)
4842         {
4843           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4844             {
4845               if (ci == cand->id)
4846                 continue;
4847
4848               cnd = iv_cand (data, ci);
4849
4850               cp = get_use_iv_cost (data, use, cnd);
4851               if (!cp)
4852                 continue;
4853               if (!iv_ca_has_deps (ivs, cp))
4854                 continue;
4855
4856               if (!cheaper_cost_pair (cp, new_cp))
4857                 continue;
4858
4859               new_cp = cp;
4860             }
4861         }
4862       else
4863         {
4864           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4865             {
4866               if (ci == cand->id)
4867                 continue;
4868
4869               cnd = iv_cand (data, ci);
4870
4871               cp = get_use_iv_cost (data, use, cnd);
4872               if (!cp)
4873                 continue;
4874               if (!iv_ca_has_deps (ivs, cp))
4875                 continue;
4876
4877               if (!cheaper_cost_pair (cp, new_cp))
4878                 continue;
4879
4880               new_cp = cp;
4881             }
4882         }
4883
4884       if (!new_cp)
4885         {
4886           iv_ca_delta_free (delta);
4887           return INFTY;
4888         }
4889
4890       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4891     }
4892
4893   iv_ca_delta_commit (data, ivs, *delta, true);
4894   cost = iv_ca_cost (ivs);
4895   iv_ca_delta_commit (data, ivs, *delta, false);
4896
4897   return cost;
4898 }
4899
4900 /* Try optimizing the set of candidates IVS by removing candidates different
4901    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4902    differences in DELTA.  */
4903
4904 static unsigned
4905 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4906              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4907 {
4908   bitmap_iterator bi;
4909   struct iv_ca_delta *act_delta, *best_delta;
4910   unsigned i, best_cost, acost;
4911   struct iv_cand *cand;
4912
4913   best_delta = NULL;
4914   best_cost = iv_ca_cost (ivs);
4915
4916   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4917     {
4918       cand = iv_cand (data, i);
4919
4920       if (cand == except_cand)
4921         continue;
4922
4923       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4924
4925       if (acost < best_cost)
4926         {
4927           best_cost = acost;
4928           iv_ca_delta_free (&best_delta);
4929           best_delta = act_delta;
4930         }
4931       else
4932         iv_ca_delta_free (&act_delta);
4933     }
4934
4935   if (!best_delta)
4936     {
4937       *delta = NULL;
4938       return best_cost;
4939     }
4940
4941   /* Recurse to possibly remove other unnecessary ivs.  */
4942   iv_ca_delta_commit (data, ivs, best_delta, true);
4943   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4944   iv_ca_delta_commit (data, ivs, best_delta, false);
4945   *delta = iv_ca_delta_join (best_delta, *delta);
4946   return best_cost;
4947 }
4948
4949 /* Tries to extend the sets IVS in the best possible way in order
4950    to express the USE.  */
4951
4952 static bool
4953 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4954                   struct iv_use *use)
4955 {
4956   unsigned best_cost, act_cost;
4957   unsigned i;
4958   bitmap_iterator bi;
4959   struct iv_cand *cand;
4960   struct iv_ca_delta *best_delta = NULL, *act_delta;
4961   struct cost_pair *cp;
4962
4963   iv_ca_add_use (data, ivs, use);
4964   best_cost = iv_ca_cost (ivs);
4965
4966   cp = iv_ca_cand_for_use (ivs, use);
4967   if (cp)
4968     {
4969       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4970       iv_ca_set_no_cp (data, ivs, use);
4971     }
4972
4973   /* First try important candidates.  Only if it fails, try the specific ones.
4974      Rationale -- in loops with many variables the best choice often is to use
4975      just one generic biv.  If we added here many ivs specific to the uses,
4976      the optimization algorithm later would be likely to get stuck in a local
4977      minimum, thus causing us to create too many ivs.  The approach from
4978      few ivs to more seems more likely to be successful -- starting from few
4979      ivs, replacing an expensive use by a specific iv should always be a
4980      win.  */
4981   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4982     {
4983       cand = iv_cand (data, i);
4984
4985       if (iv_ca_cand_used_p (ivs, cand))
4986         continue;
4987
4988       cp = get_use_iv_cost (data, use, cand);
4989       if (!cp)
4990         continue;
4991
4992       iv_ca_set_cp (data, ivs, use, cp);
4993       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4994       iv_ca_set_no_cp (data, ivs, use);
4995       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4996
4997       if (act_cost < best_cost)
4998         {
4999           best_cost = act_cost;
5000
5001           iv_ca_delta_free (&best_delta);
5002           best_delta = act_delta;
5003         }
5004       else
5005         iv_ca_delta_free (&act_delta);
5006     }
5007
5008   if (best_cost == INFTY)
5009     {
5010       for (i = 0; i < use->n_map_members; i++)
5011         {
5012           cp = use->cost_map + i;
5013           cand = cp->cand;
5014           if (!cand)
5015             continue;
5016
5017           /* Already tried this.  */
5018           if (cand->important)
5019             continue;
5020
5021           if (iv_ca_cand_used_p (ivs, cand))
5022             continue;
5023
5024           act_delta = NULL;
5025           iv_ca_set_cp (data, ivs, use, cp);
5026           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5027           iv_ca_set_no_cp (data, ivs, use);
5028           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5029                                        cp, act_delta);
5030
5031           if (act_cost < best_cost)
5032             {
5033               best_cost = act_cost;
5034
5035               if (best_delta)
5036                 iv_ca_delta_free (&best_delta);
5037               best_delta = act_delta;
5038             }
5039           else
5040             iv_ca_delta_free (&act_delta);
5041         }
5042     }
5043
5044   iv_ca_delta_commit (data, ivs, best_delta, true);
5045   iv_ca_delta_free (&best_delta);
5046
5047   return (best_cost != INFTY);
5048 }
5049
5050 /* Finds an initial assignment of candidates to uses.  */
5051
5052 static struct iv_ca *
5053 get_initial_solution (struct ivopts_data *data)
5054 {
5055   struct iv_ca *ivs = iv_ca_new (data);
5056   unsigned i;
5057
5058   for (i = 0; i < n_iv_uses (data); i++)
5059     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5060       {
5061         iv_ca_free (&ivs);
5062         return NULL;
5063       }
5064
5065   return ivs;
5066 }
5067
5068 /* Tries to improve set of induction variables IVS.  */
5069
5070 static bool
5071 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5072 {
5073   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5074   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5075   struct iv_cand *cand;
5076
5077   /* Try extending the set of induction variables by one.  */
5078   for (i = 0; i < n_iv_cands (data); i++)
5079     {
5080       cand = iv_cand (data, i);
5081
5082       if (iv_ca_cand_used_p (ivs, cand))
5083         continue;
5084
5085       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5086       if (!act_delta)
5087         continue;
5088
5089       /* If we successfully added the candidate and the set is small enough,
5090          try optimizing it by removing other candidates.  */
5091       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5092         {
5093           iv_ca_delta_commit (data, ivs, act_delta, true);
5094           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5095           iv_ca_delta_commit (data, ivs, act_delta, false);
5096           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5097         }
5098
5099       if (acost < best_cost)
5100         {
5101           best_cost = acost;
5102           iv_ca_delta_free (&best_delta);
5103           best_delta = act_delta;
5104         }
5105       else
5106         iv_ca_delta_free (&act_delta);
5107     }
5108
5109   if (!best_delta)
5110     {
5111       /* Try removing the candidates from the set instead.  */
5112       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5113
5114       /* Nothing more we can do.  */
5115       if (!best_delta)
5116         return false;
5117     }
5118
5119   iv_ca_delta_commit (data, ivs, best_delta, true);
5120   gcc_assert (best_cost == iv_ca_cost (ivs));
5121   iv_ca_delta_free (&best_delta);
5122   return true;
5123 }
5124
5125 /* Attempts to find the optimal set of induction variables.  We do simple
5126    greedy heuristic -- we try to replace at most one candidate in the selected
5127    solution and remove the unused ivs while this improves the cost.  */
5128
5129 static struct iv_ca *
5130 find_optimal_iv_set (struct ivopts_data *data)
5131 {
5132   unsigned i;
5133   struct iv_ca *set;
5134   struct iv_use *use;
5135
5136   /* Get the initial solution.  */
5137   set = get_initial_solution (data);
5138   if (!set)
5139     {
5140       if (dump_file && (dump_flags & TDF_DETAILS))
5141         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5142       return NULL;
5143     }
5144
5145   if (dump_file && (dump_flags & TDF_DETAILS))
5146     {
5147       fprintf (dump_file, "Initial set of candidates:\n");
5148       iv_ca_dump (data, dump_file, set);
5149     }
5150
5151   while (try_improve_iv_set (data, set))
5152     {
5153       if (dump_file && (dump_flags & TDF_DETAILS))
5154         {
5155           fprintf (dump_file, "Improved to:\n");
5156           iv_ca_dump (data, dump_file, set);
5157         }
5158     }
5159
5160   if (dump_file && (dump_flags & TDF_DETAILS))
5161     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5162
5163   for (i = 0; i < n_iv_uses (data); i++)
5164     {
5165       use = iv_use (data, i);
5166       use->selected = iv_ca_cand_for_use (set, use)->cand;
5167     }
5168
5169   return set;
5170 }
5171
5172 /* Creates a new induction variable corresponding to CAND.  */
5173
5174 static void
5175 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5176 {
5177   block_stmt_iterator incr_pos;
5178   tree base;
5179   bool after = false;
5180
5181   if (!cand->iv)
5182     return;
5183
5184   switch (cand->pos)
5185     {
5186     case IP_NORMAL:
5187       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5188       break;
5189
5190     case IP_END:
5191       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5192       after = true;
5193       break;
5194
5195     case IP_ORIGINAL:
5196       /* Mark that the iv is preserved.  */
5197       name_info (data, cand->var_before)->preserve_biv = true;
5198       name_info (data, cand->var_after)->preserve_biv = true;
5199
5200       /* Rewrite the increment so that it uses var_before directly.  */
5201       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5202
5203       return;
5204     }
5205
5206   gimple_add_tmp_var (cand->var_before);
5207   add_referenced_tmp_var (cand->var_before);
5208
5209   base = unshare_expr (cand->iv->base);
5210
5211   create_iv (base, unshare_expr (cand->iv->step),
5212              cand->var_before, data->current_loop,
5213              &incr_pos, after, &cand->var_before, &cand->var_after);
5214 }
5215
5216 /* Creates new induction variables described in SET.  */
5217
5218 static void
5219 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5220 {
5221   unsigned i;
5222   struct iv_cand *cand;
5223   bitmap_iterator bi;
5224
5225   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5226     {
5227       cand = iv_cand (data, i);
5228       create_new_iv (data, cand);
5229     }
5230 }
5231
5232 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5233    is true, remove also the ssa name defined by the statement.  */
5234
5235 static void
5236 remove_statement (tree stmt, bool including_defined_name)
5237 {
5238   if (TREE_CODE (stmt) == PHI_NODE)
5239     {
5240       if (!including_defined_name)
5241         {
5242           /* Prevent the ssa name defined by the statement from being removed.  */
5243           SET_PHI_RESULT (stmt, NULL);
5244         }
5245       remove_phi_node (stmt, NULL_TREE);
5246     }
5247   else
5248     {
5249       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5250
5251       bsi_remove (&bsi);
5252     }
5253 }
5254
5255 /* Rewrites USE (definition of iv used in a nonlinear expression)
5256    using candidate CAND.  */
5257
5258 static void
5259 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5260                             struct iv_use *use, struct iv_cand *cand)
5261 {
5262   tree comp;
5263   tree op, stmts, tgt, ass;
5264   block_stmt_iterator bsi, pbsi;
5265
5266   /* An important special case -- if we are asked to express value of
5267      the original iv by itself, just exit; there is no need to
5268      introduce a new computation (that might also need casting the
5269      variable to unsigned and back).  */
5270   if (cand->pos == IP_ORIGINAL
5271       && TREE_CODE (use->stmt) == MODIFY_EXPR
5272       && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5273     {
5274       op = TREE_OPERAND (use->stmt, 1);
5275
5276       /* Be a bit careful.  In case variable is expressed in some
5277          complicated way, rewrite it so that we may get rid of this
5278          complicated expression.  */
5279       if ((TREE_CODE (op) == PLUS_EXPR
5280            || TREE_CODE (op) == MINUS_EXPR)
5281           && TREE_OPERAND (op, 0) == cand->var_before
5282           && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5283         return;
5284     }
5285
5286   comp = unshare_expr (get_computation (data->current_loop,
5287                                         use, cand));
5288   switch (TREE_CODE (use->stmt))
5289     {
5290     case PHI_NODE:
5291       tgt = PHI_RESULT (use->stmt);
5292
5293       /* If we should keep the biv, do not replace it.  */
5294       if (name_info (data, tgt)->preserve_biv)
5295         return;
5296
5297       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5298       while (!bsi_end_p (pbsi)
5299              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5300         {
5301           bsi = pbsi;
5302           bsi_next (&pbsi);
5303         }
5304       break;
5305
5306     case MODIFY_EXPR:
5307       tgt = TREE_OPERAND (use->stmt, 0);
5308       bsi = bsi_for_stmt (use->stmt);
5309       break;
5310
5311     default:
5312       gcc_unreachable ();
5313     }
5314
5315   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5316
5317   if (TREE_CODE (use->stmt) == PHI_NODE)
5318     {
5319       if (stmts)
5320         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5321       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5322       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5323       remove_statement (use->stmt, false);
5324       SSA_NAME_DEF_STMT (tgt) = ass;
5325     }
5326   else
5327     {
5328       if (stmts)
5329         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5330       TREE_OPERAND (use->stmt, 1) = op;
5331     }
5332 }
5333
5334 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5335    for_each_index.  */
5336
5337 static bool
5338 idx_remove_ssa_names (tree base, tree *idx,
5339                       void *data ATTRIBUTE_UNUSED)
5340 {
5341   tree *op;
5342
5343   if (TREE_CODE (*idx) == SSA_NAME)
5344     *idx = SSA_NAME_VAR (*idx);
5345
5346   if (TREE_CODE (base) == ARRAY_REF)
5347     {
5348       op = &TREE_OPERAND (base, 2);
5349       if (*op
5350           && TREE_CODE (*op) == SSA_NAME)
5351         *op = SSA_NAME_VAR (*op);
5352       op = &TREE_OPERAND (base, 3);
5353       if (*op
5354           && TREE_CODE (*op) == SSA_NAME)
5355         *op = SSA_NAME_VAR (*op);
5356     }
5357
5358   return true;
5359 }
5360
5361 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5362
5363 static tree
5364 unshare_and_remove_ssa_names (tree ref)
5365 {
5366   ref = unshare_expr (ref);
5367   for_each_index (&ref, idx_remove_ssa_names, NULL);
5368
5369   return ref;
5370 }
5371
5372 /* Rewrites base of memory access OP with expression WITH in statement
5373    pointed to by BSI.  */
5374
5375 static void
5376 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
5377 {
5378   tree bvar, var, new_name, copy, name;
5379   tree orig;
5380
5381   var = bvar = get_base_address (*op);
5382
5383   if (!var || TREE_CODE (with) != SSA_NAME)
5384     goto do_rewrite;
5385
5386   gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
5387   gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
5388   if (TREE_CODE (var) == INDIRECT_REF)
5389     var = TREE_OPERAND (var, 0);
5390   if (TREE_CODE (var) == SSA_NAME)
5391     {
5392       name = var;
5393       var = SSA_NAME_VAR (var);
5394     }
5395   else if (DECL_P (var))
5396     name = NULL_TREE;
5397   else
5398     goto do_rewrite;
5399
5400   /* We need to add a memory tag for the variable.  But we do not want
5401      to add it to the temporary used for the computations, since this leads
5402      to problems in redundancy elimination when there are common parts
5403      in two computations referring to the different arrays.  So we copy
5404      the variable to a new temporary.  */
5405   copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
5406
5407   if (name)
5408     new_name = duplicate_ssa_name (name, copy);
5409   else
5410     {
5411       tree tag = var_ann (var)->type_mem_tag;
5412       tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
5413       add_referenced_tmp_var (new_ptr);
5414       if (tag)
5415         var_ann (new_ptr)->type_mem_tag = tag;
5416       else
5417         add_type_alias (new_ptr, var);
5418       new_name = make_ssa_name (new_ptr, copy);
5419     }
5420
5421   TREE_OPERAND (copy, 0) = new_name;
5422   bsi_insert_before (bsi, copy, BSI_SAME_STMT);
5423   with = new_name;
5424
5425 do_rewrite:
5426
5427   orig = NULL_TREE;
5428   gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
5429   gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
5430
5431   if (TREE_CODE (*op) == INDIRECT_REF)
5432     orig = REF_ORIGINAL (*op);
5433   if (!orig)
5434     orig = unshare_and_remove_ssa_names (*op);
5435
5436   *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
5437
5438   /* Record the original reference, for purposes of alias analysis.  */
5439   REF_ORIGINAL (*op) = orig;
5440
5441   /* Virtual operands in the original statement may have to be renamed
5442      because of the replacement.  */
5443   mark_new_vars_to_rename (bsi_stmt (*bsi));
5444 }
5445
5446 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5447
5448 static void
5449 rewrite_use_address (struct ivopts_data *data,
5450                      struct iv_use *use, struct iv_cand *cand)
5451 {
5452   tree comp = unshare_expr (get_computation (data->current_loop,
5453                                              use, cand));
5454   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5455   tree stmts;
5456   tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
5457
5458   if (stmts)
5459     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5460
5461   rewrite_address_base (&bsi, use->op_p, op);
5462 }
5463
5464 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5465    candidate CAND.  */
5466
5467 static void
5468 rewrite_use_compare (struct ivopts_data *data,
5469                      struct iv_use *use, struct iv_cand *cand)
5470 {
5471   tree comp;
5472   tree *op_p, cond, op, stmts, bound;
5473   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5474   enum tree_code compare;
5475   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5476
5477   bound = cp->value;
5478   if (bound)
5479     {
5480       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5481       tree var_type = TREE_TYPE (var);
5482
5483       compare = iv_elimination_compare (data, use);
5484       bound = fold_convert (var_type, bound);
5485       op = force_gimple_operand (unshare_expr (bound), &stmts,
5486                                  true, NULL_TREE);
5487
5488       if (stmts)
5489         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5490
5491       *use->op_p = build2 (compare, boolean_type_node, var, op);
5492       update_stmt (use->stmt);
5493       return;
5494     }
5495
5496   /* The induction variable elimination failed; just express the original
5497      giv.  */
5498   comp = unshare_expr (get_computation (data->current_loop, use, cand));
5499
5500   cond = *use->op_p;
5501   op_p = &TREE_OPERAND (cond, 0);
5502   if (TREE_CODE (*op_p) != SSA_NAME
5503       || zero_p (get_iv (data, *op_p)->step))
5504     op_p = &TREE_OPERAND (cond, 1);
5505
5506   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5507   if (stmts)
5508     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5509
5510   *op_p = op;
5511 }
5512
5513 /* Ensure that operand *OP_P may be used at the end of EXIT without
5514    violating loop closed ssa form.  */
5515
5516 static void
5517 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5518 {
5519   basic_block def_bb;
5520   struct loop *def_loop;
5521   tree phi, use;
5522
5523   use = USE_FROM_PTR (op_p);
5524   if (TREE_CODE (use) != SSA_NAME)
5525     return;
5526
5527   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5528   if (!def_bb)
5529     return;
5530
5531   def_loop = def_bb->loop_father;
5532   if (flow_bb_inside_loop_p (def_loop, exit->dest))
5533     return;
5534
5535   /* Try finding a phi node that copies the value out of the loop.  */
5536   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5537     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5538       break;
5539
5540   if (!phi)
5541     {
5542       /* Create such a phi node.  */
5543       tree new_name = duplicate_ssa_name (use, NULL);
5544
5545       phi = create_phi_node (new_name, exit->dest);
5546       SSA_NAME_DEF_STMT (new_name) = phi;
5547       add_phi_arg (phi, use, exit);
5548     }
5549
5550   SET_USE (op_p, PHI_RESULT (phi));
5551 }
5552
5553 /* Ensure that operands of STMT may be used at the end of EXIT without
5554    violating loop closed ssa form.  */
5555
5556 static void
5557 protect_loop_closed_ssa_form (edge exit, tree stmt)
5558 {
5559   ssa_op_iter iter;
5560   use_operand_p use_p;
5561
5562   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5563     protect_loop_closed_ssa_form_use (exit, use_p);
5564 }
5565
5566 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5567    so that they are emitted on the correct place, and so that the loop closed
5568    ssa form is preserved.  */
5569
5570 static void
5571 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5572 {
5573   tree_stmt_iterator tsi;
5574   block_stmt_iterator bsi;
5575   tree phi, stmt, def, next;
5576
5577   if (!single_pred_p (exit->dest))
5578     split_loop_exit_edge (exit);
5579
5580   /* Ensure there is label in exit->dest, so that we can
5581      insert after it.  */
5582   tree_block_label (exit->dest);
5583   bsi = bsi_after_labels (exit->dest);
5584
5585   if (TREE_CODE (stmts) == STATEMENT_LIST)
5586     {
5587       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5588         {
5589           bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5590           protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5591         }
5592     }
5593   else
5594     {
5595       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5596       protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5597     }
5598
5599   if (!op)
5600     return;
5601
5602   for (phi = phi_nodes (exit->dest); phi; phi = next)
5603     {
5604       next = PHI_CHAIN (phi);
5605
5606       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5607         {
5608           def = PHI_RESULT (phi);
5609           remove_statement (phi, false);
5610           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5611                         def, op);
5612           SSA_NAME_DEF_STMT (def) = stmt;
5613           bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5614         }
5615     }
5616 }
5617
5618 /* Rewrites the final value of USE (that is only needed outside of the loop)
5619    using candidate CAND.  */
5620
5621 static void
5622 rewrite_use_outer (struct ivopts_data *data,
5623                    struct iv_use *use, struct iv_cand *cand)
5624 {
5625   edge exit;
5626   tree value, op, stmts, tgt;
5627   tree phi;
5628
5629   switch (TREE_CODE (use->stmt))
5630     {
5631     case PHI_NODE:
5632       tgt = PHI_RESULT (use->stmt);
5633       break;
5634     case MODIFY_EXPR:
5635       tgt = TREE_OPERAND (use->stmt, 0);
5636       break;
5637     default:
5638       gcc_unreachable ();
5639     }
5640
5641   exit = single_dom_exit (data->current_loop);
5642
5643   if (exit)
5644     {
5645       if (!cand->iv)
5646         {
5647           struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5648           value = cp->value;
5649         }
5650       else
5651         value = get_computation_at (data->current_loop,
5652                                     use, cand, last_stmt (exit->src));
5653
5654       value = unshare_expr (value);
5655       op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5656
5657       /* If we will preserve the iv anyway and we would need to perform
5658          some computation to replace the final value, do nothing.  */
5659       if (stmts && name_info (data, tgt)->preserve_biv)
5660         return;
5661
5662       for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5663         {
5664           use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5665
5666           if (USE_FROM_PTR (use_p) == tgt)
5667             SET_USE (use_p, op);
5668         }
5669
5670       if (stmts)
5671         compute_phi_arg_on_exit (exit, stmts, op);
5672
5673       /* Enable removal of the statement.  We cannot remove it directly,
5674          since we may still need the aliasing information attached to the
5675          ssa name defined by it.  */
5676       name_info (data, tgt)->iv->have_use_for = false;
5677       return;
5678     }
5679
5680   /* If the variable is going to be preserved anyway, there is nothing to
5681      do.  */
5682   if (name_info (data, tgt)->preserve_biv)
5683     return;
5684
5685   /* Otherwise we just need to compute the iv.  */
5686   rewrite_use_nonlinear_expr (data, use, cand);
5687 }
5688
5689 /* Rewrites USE using candidate CAND.  */
5690
5691 static void
5692 rewrite_use (struct ivopts_data *data,
5693              struct iv_use *use, struct iv_cand *cand)
5694 {
5695   switch (use->type)
5696     {
5697       case USE_NONLINEAR_EXPR:
5698         rewrite_use_nonlinear_expr (data, use, cand);
5699         break;
5700
5701       case USE_OUTER:
5702         rewrite_use_outer (data, use, cand);
5703         break;
5704
5705       case USE_ADDRESS:
5706         rewrite_use_address (data, use, cand);
5707         break;
5708
5709       case USE_COMPARE:
5710         rewrite_use_compare (data, use, cand);
5711         break;
5712
5713       default:
5714         gcc_unreachable ();
5715     }
5716   update_stmt (use->stmt);
5717 }
5718
5719 /* Rewrite the uses using the selected induction variables.  */
5720
5721 static void
5722 rewrite_uses (struct ivopts_data *data)
5723 {
5724   unsigned i;
5725   struct iv_cand *cand;
5726   struct iv_use *use;
5727
5728   for (i = 0; i < n_iv_uses (data); i++)
5729     {
5730       use = iv_use (data, i);
5731       cand = use->selected;
5732       gcc_assert (cand);
5733
5734       rewrite_use (data, use, cand);
5735     }
5736 }
5737
5738 /* Removes the ivs that are not used after rewriting.  */
5739
5740 static void
5741 remove_unused_ivs (struct ivopts_data *data)
5742 {
5743   unsigned j;
5744   bitmap_iterator bi;
5745
5746   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5747     {
5748       struct version_info *info;
5749
5750       info = ver_info (data, j);
5751       if (info->iv
5752           && !zero_p (info->iv->step)
5753           && !info->inv_id
5754           && !info->iv->have_use_for
5755           && !info->preserve_biv)
5756         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5757     }
5758 }
5759
5760 /* Frees data allocated by the optimization of a single loop.  */
5761
5762 static void
5763 free_loop_data (struct ivopts_data *data)
5764 {
5765   unsigned i, j;
5766   bitmap_iterator bi;
5767   tree obj;
5768
5769   htab_empty (data->niters);
5770
5771   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5772     {
5773       struct version_info *info;
5774
5775       info = ver_info (data, i);
5776       if (info->iv)
5777         free (info->iv);
5778       info->iv = NULL;
5779       info->has_nonlin_use = false;
5780       info->preserve_biv = false;
5781       info->inv_id = 0;
5782     }
5783   bitmap_clear (data->relevant);
5784   bitmap_clear (data->important_candidates);
5785
5786   for (i = 0; i < n_iv_uses (data); i++)
5787     {
5788       struct iv_use *use = iv_use (data, i);
5789
5790       free (use->iv);
5791       BITMAP_FREE (use->related_cands);
5792       for (j = 0; j < use->n_map_members; j++)
5793         if (use->cost_map[j].depends_on)
5794           BITMAP_FREE (use->cost_map[j].depends_on);
5795       free (use->cost_map);
5796       free (use);
5797     }
5798   VEC_truncate (iv_use_p, data->iv_uses, 0);
5799
5800   for (i = 0; i < n_iv_cands (data); i++)
5801     {
5802       struct iv_cand *cand = iv_cand (data, i);
5803
5804       if (cand->iv)
5805         free (cand->iv);
5806       if (cand->depends_on)
5807         BITMAP_FREE (cand->depends_on);
5808       free (cand);
5809     }
5810   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5811
5812   if (data->version_info_size < num_ssa_names)
5813     {
5814       data->version_info_size = 2 * num_ssa_names;
5815       free (data->version_info);
5816       data->version_info = xcalloc (data->version_info_size,
5817                                     sizeof (struct version_info));
5818     }
5819
5820   data->max_inv_id = 0;
5821
5822   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5823     SET_DECL_RTL (obj, NULL_RTX);
5824
5825   VEC_truncate (tree, decl_rtl_to_reset, 0);
5826 }
5827
5828 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5829    loop tree.  */
5830
5831 static void
5832 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5833 {
5834   unsigned i;
5835
5836   for (i = 1; i < loops->num; i++)
5837     if (loops->parray[i])
5838       {
5839         free (loops->parray[i]->aux);
5840         loops->parray[i]->aux = NULL;
5841       }
5842
5843   free_loop_data (data);
5844   free (data->version_info);
5845   BITMAP_FREE (data->relevant);
5846   BITMAP_FREE (data->important_candidates);
5847   htab_delete (data->niters);
5848
5849   VEC_free (tree, heap, decl_rtl_to_reset);
5850   VEC_free (iv_use_p, heap, data->iv_uses);
5851   VEC_free (iv_cand_p, heap, data->iv_candidates);
5852 }
5853
5854 /* Optimizes the LOOP.  Returns true if anything changed.  */
5855
5856 static bool
5857 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5858 {
5859   bool changed = false;
5860   struct iv_ca *iv_ca;
5861   edge exit;
5862
5863   data->current_loop = loop;
5864
5865   if (dump_file && (dump_flags & TDF_DETAILS))
5866     {
5867       fprintf (dump_file, "Processing loop %d\n", loop->num);
5868
5869       exit = single_dom_exit (loop);
5870       if (exit)
5871         {
5872           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5873                    exit->src->index, exit->dest->index);
5874           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5875           fprintf (dump_file, "\n");
5876         }
5877
5878       fprintf (dump_file, "\n");
5879     }
5880
5881   /* For each ssa name determines whether it behaves as an induction variable
5882      in some loop.  */
5883   if (!find_induction_variables (data))
5884     goto finish;
5885
5886   /* Finds interesting uses (item 1).  */
5887   find_interesting_uses (data);
5888   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5889     goto finish;
5890
5891   /* Finds candidates for the induction variables (item 2).  */
5892   find_iv_candidates (data);
5893
5894   /* Calculates the costs (item 3, part 1).  */
5895   determine_use_iv_costs (data);
5896   determine_iv_costs (data);
5897   determine_set_costs (data);
5898
5899   /* Find the optimal set of induction variables (item 3, part 2).  */
5900   iv_ca = find_optimal_iv_set (data);
5901   if (!iv_ca)
5902     goto finish;
5903   changed = true;
5904
5905   /* Create the new induction variables (item 4, part 1).  */
5906   create_new_ivs (data, iv_ca);
5907   iv_ca_free (&iv_ca);
5908
5909   /* Rewrite the uses (item 4, part 2).  */
5910   rewrite_uses (data);
5911
5912   /* Remove the ivs that are unused after rewriting.  */
5913   remove_unused_ivs (data);
5914
5915   /* We have changed the structure of induction variables; it might happen
5916      that definitions in the scev database refer to some of them that were
5917      eliminated.  */
5918   scev_reset ();
5919
5920 finish:
5921   free_loop_data (data);
5922
5923   return changed;
5924 }
5925
5926 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5927
5928 void
5929 tree_ssa_iv_optimize (struct loops *loops)
5930 {
5931   struct loop *loop;
5932   struct ivopts_data data;
5933
5934   tree_ssa_iv_optimize_init (loops, &data);
5935
5936   /* Optimize the loops starting with the innermost ones.  */
5937   loop = loops->tree_root;
5938   while (loop->inner)
5939     loop = loop->inner;
5940
5941   /* Scan the loops, inner ones first.  */
5942   while (loop != loops->tree_root)
5943     {
5944       if (dump_file && (dump_flags & TDF_DETAILS))
5945         flow_loop_dump (loop, dump_file, NULL, 1);
5946
5947       tree_ssa_iv_optimize_loop (&data, loop);
5948
5949       if (loop->next)
5950         {
5951           loop = loop->next;
5952           while (loop->inner)
5953             loop = loop->inner;
5954         }
5955       else
5956         loop = loop->outer;
5957     }
5958
5959   /* FIXME.  IV opts introduces new aliases and call-clobbered
5960      variables, which need to be renamed.  However, when we call the
5961      renamer, not all statements will be scanned for operands.  In
5962      particular, the newly introduced aliases may appear in statements
5963      that are considered "unmodified", so the renamer will not get a
5964      chance to rename those operands.
5965
5966      Work around this problem by forcing an operand re-scan on every
5967      statement.  This will not be necessary once the new operand
5968      scanner is implemented.  */
5969   if (need_ssa_update_p ())
5970     {
5971       basic_block bb;
5972       block_stmt_iterator si;
5973       FOR_EACH_BB (bb)
5974         for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5975           update_stmt (bsi_stmt (si));
5976     }
5977
5978   rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
5979   tree_ssa_iv_optimize_finalize (loops, &data);
5980 }