gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Information attached to loop.  */
 125 struct loop_data
 126 {
 127   unsigned regs_used;   /* Number of registers used.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_ADDRESS,          /* Use in an address.  */
 135   USE_COMPARE           /* Use is a compare.  */
 136 };
 137
 138 /* The candidate - cost pair.  */
 139 struct cost_pair
 140 {
 141   struct iv_cand *cand; /* The candidate.  */
 142   unsigned cost;        /* The cost.  */
 143   bitmap depends_on;    /* The list of invariants that have to be
 144                            preserved.  */
 145   tree value;           /* For final value elimination, the expression for
 146                            the final value of the iv.  For iv elimination,
 147                            the new bound to compare with.  */
 148 };
 149
 150 /* Use.  */
 151 struct iv_use
 152 {
 153   unsigned id;          /* The id of the use.  */
 154   enum use_type type;   /* Type of the use.  */
 155   struct iv *iv;        /* The induction variable it is based on.  */
 156   tree stmt;            /* Statement in that it occurs.  */
 157   tree *op_p;           /* The place where it occurs.  */
 158   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 159                            important ones.  */
 160
 161   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 162   struct cost_pair *cost_map;
 163                         /* The costs wrto the iv candidates.  */
 164
 165   struct iv_cand *selected;
 166                         /* The selected candidate.  */
 167 };
 168
 169 /* The position where the iv is computed.  */
 170 enum iv_position
 171 {
 172   IP_NORMAL,            /* At the end, just before the exit condition.  */
 173   IP_END,               /* At the end of the latch block.  */
 174   IP_ORIGINAL           /* The original biv.  */
 175 };
 176
 177 /* The induction variable candidate.  */
 178 struct iv_cand
 179 {
 180   unsigned id;          /* The number of the candidate.  */
 181   bool important;       /* Whether this is an "important" candidate, i.e. such
 182                            that it should be considered by all uses.  */
 183   enum iv_position pos; /* Where it is computed.  */
 184   tree incremented_at;  /* For original biv, the statement where it is
 185                            incremented.  */
 186   tree var_before;      /* The variable used for it before increment.  */
 187   tree var_after;       /* The variable used for it after increment.  */
 188   struct iv *iv;        /* The value of the candidate.  NULL for
 189                            "pseudocandidate" used to indicate the possibility
 190                            to replace the final value of an iv by direct
 191                            computation of the value.  */
 192   unsigned cost;        /* Cost of the candidate.  */
 193   bitmap depends_on;    /* The list of invariants that are used in step of the
 194                            biv.  */
 195 };
 196
 197 /* The data used by the induction variable optimizations.  */
 198
 199 typedef struct iv_use *iv_use_p;
 200 DEF_VEC_P(iv_use_p);
 201 DEF_VEC_ALLOC_P(iv_use_p,heap);
 202
 203 typedef struct iv_cand *iv_cand_p;
 204 DEF_VEC_P(iv_cand_p);
 205 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 206
 207 struct ivopts_data
 208 {
 209   /* The currently optimized loop.  */
 210   struct loop *current_loop;
 211
 212   /* Numbers of iterations for all exits of the current loop.  */
 213   htab_t niters;
 214
 215   /* The size of version_info array allocated.  */
 216   unsigned version_info_size;
 217
 218   /* The array of information for the ssa names.  */
 219   struct version_info *version_info;
 220
 221   /* The bitmap of indices in version_info whose value was changed.  */
 222   bitmap relevant;
 223
 224   /* The maximum invariant id.  */
 225   unsigned max_inv_id;
 226
 227   /* The uses of induction variables.  */
 228   VEC(iv_use_p,heap) *iv_uses;
 229
 230   /* The candidates.  */
 231   VEC(iv_cand_p,heap) *iv_candidates;
 232
 233   /* A bitmap of important candidates.  */
 234   bitmap important_candidates;
 235
 236   /* Whether to consider just related and important candidates when replacing a
 237      use.  */
 238   bool consider_all_candidates;
 239 };
 240
 241 /* An assignment of iv candidates to uses.  */
 242
 243 struct iv_ca
 244 {
 245   /* The number of uses covered by the assignment.  */
 246   unsigned upto;
 247
 248   /* Number of uses that cannot be expressed by the candidates in the set.  */
 249   unsigned bad_uses;
 250
 251   /* Candidate assigned to a use, together with the related costs.  */
 252   struct cost_pair **cand_for_use;
 253
 254   /* Number of times each candidate is used.  */
 255   unsigned *n_cand_uses;
 256
 257   /* The candidates used.  */
 258   bitmap cands;
 259
 260   /* The number of candidates in the set.  */
 261   unsigned n_cands;
 262
 263   /* Total number of registers needed.  */
 264   unsigned n_regs;
 265
 266   /* Total cost of expressing uses.  */
 267   unsigned cand_use_cost;
 268
 269   /* Total cost of candidates.  */
 270   unsigned cand_cost;
 271
 272   /* Number of times each invariant is used.  */
 273   unsigned *n_invariant_uses;
 274
 275   /* Total cost of the assignment.  */
 276   unsigned cost;
 277 };
 278
 279 /* Difference of two iv candidate assignments.  */
 280
 281 struct iv_ca_delta
 282 {
 283   /* Changed use.  */
 284   struct iv_use *use;
 285
 286   /* An old assignment (for rollback purposes).  */
 287   struct cost_pair *old_cp;
 288
 289   /* A new assignment.  */
 290   struct cost_pair *new_cp;
 291
 292   /* Next change in the list.  */
 293   struct iv_ca_delta *next_change;
 294 };
 295
 296 /* Bound on number of candidates below that all candidates are considered.  */
 297
 298 #define CONSIDER_ALL_CANDIDATES_BOUND \
 299   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 300
 301 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 302    optimizing such a loop would help, and it would take ages).  */
 303
 304 #define MAX_CONSIDERED_USES \
 305   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 306
 307 /* If there are at most this number of ivs in the set, try removing unnecessary
 308    ivs from the set always.  */
 309
 310 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 311   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 312
 313 /* The list of trees for that the decl_rtl field must be reset is stored
 314    here.  */
 315
 316 static VEC(tree,heap) *decl_rtl_to_reset;
 317
 318 /* Number of uses recorded in DATA.  */
 319
 320 static inline unsigned
 321 n_iv_uses (struct ivopts_data *data)
 322 {
 323   return VEC_length (iv_use_p, data->iv_uses);
 324 }
 325
 326 /* Ith use recorded in DATA.  */
 327
 328 static inline struct iv_use *
 329 iv_use (struct ivopts_data *data, unsigned i)
 330 {
 331   return VEC_index (iv_use_p, data->iv_uses, i);
 332 }
 333
 334 /* Number of candidates recorded in DATA.  */
 335
 336 static inline unsigned
 337 n_iv_cands (struct ivopts_data *data)
 338 {
 339   return VEC_length (iv_cand_p, data->iv_candidates);
 340 }
 341
 342 /* Ith candidate recorded in DATA.  */
 343
 344 static inline struct iv_cand *
 345 iv_cand (struct ivopts_data *data, unsigned i)
 346 {
 347   return VEC_index (iv_cand_p, data->iv_candidates, i);
 348 }
 349
 350 /* The data for LOOP.  */
 351
 352 static inline struct loop_data *
 353 loop_data (struct loop *loop)
 354 {
 355   return loop->aux;
 356 }
 357
 358 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 359
 360 edge
 361 single_dom_exit (struct loop *loop)
 362 {
 363   edge exit = loop->single_exit;
 364
 365   if (!exit)
 366     return NULL;
 367
 368   if (!just_once_each_iteration_p (loop, exit->src))
 369     return NULL;
 370
 371   return exit;
 372 }
 373
 374 /* Dumps information about the induction variable IV to FILE.  */
 375
 376 extern void dump_iv (FILE *, struct iv *);
 377 void
 378 dump_iv (FILE *file, struct iv *iv)
 379 {
 380   if (iv->ssa_name)
 381     {
 382       fprintf (file, "ssa name ");
 383       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 384       fprintf (file, "\n");
 385     }
 386
 387   fprintf (file, "  type ");
 388   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 389   fprintf (file, "\n");
 390
 391   if (iv->step)
 392     {
 393       fprintf (file, "  base ");
 394       print_generic_expr (file, iv->base, TDF_SLIM);
 395       fprintf (file, "\n");
 396
 397       fprintf (file, "  step ");
 398       print_generic_expr (file, iv->step, TDF_SLIM);
 399       fprintf (file, "\n");
 400     }
 401   else
 402     {
 403       fprintf (file, "  invariant ");
 404       print_generic_expr (file, iv->base, TDF_SLIM);
 405       fprintf (file, "\n");
 406     }
 407
 408   if (iv->base_object)
 409     {
 410       fprintf (file, "  base object ");
 411       print_generic_expr (file, iv->base_object, TDF_SLIM);
 412       fprintf (file, "\n");
 413     }
 414
 415   if (iv->biv_p)
 416     fprintf (file, "  is a biv\n");
 417 }
 418
 419 /* Dumps information about the USE to FILE.  */
 420
 421 extern void dump_use (FILE *, struct iv_use *);
 422 void
 423 dump_use (FILE *file, struct iv_use *use)
 424 {
 425   fprintf (file, "use %d\n", use->id);
 426
 427   switch (use->type)
 428     {
 429     case USE_NONLINEAR_EXPR:
 430       fprintf (file, "  generic\n");
 431       break;
 432
 433     case USE_ADDRESS:
 434       fprintf (file, "  address\n");
 435       break;
 436
 437     case USE_COMPARE:
 438       fprintf (file, "  compare\n");
 439       break;
 440
 441     default:
 442       gcc_unreachable ();
 443     }
 444
 445   fprintf (file, "  in statement ");
 446   print_generic_expr (file, use->stmt, TDF_SLIM);
 447   fprintf (file, "\n");
 448
 449   fprintf (file, "  at position ");
 450   if (use->op_p)
 451     print_generic_expr (file, *use->op_p, TDF_SLIM);
 452   fprintf (file, "\n");
 453
 454   dump_iv (file, use->iv);
 455
 456   if (use->related_cands)
 457     {
 458       fprintf (file, "  related candidates ");
 459       dump_bitmap (file, use->related_cands);
 460     }
 461 }
 462
 463 /* Dumps information about the uses to FILE.  */
 464
 465 extern void dump_uses (FILE *, struct ivopts_data *);
 466 void
 467 dump_uses (FILE *file, struct ivopts_data *data)
 468 {
 469   unsigned i;
 470   struct iv_use *use;
 471
 472   for (i = 0; i < n_iv_uses (data); i++)
 473     {
 474       use = iv_use (data, i);
 475
 476       dump_use (file, use);
 477       fprintf (file, "\n");
 478     }
 479 }
 480
 481 /* Dumps information about induction variable candidate CAND to FILE.  */
 482
 483 extern void dump_cand (FILE *, struct iv_cand *);
 484 void
 485 dump_cand (FILE *file, struct iv_cand *cand)
 486 {
 487   struct iv *iv = cand->iv;
 488
 489   fprintf (file, "candidate %d%s\n",
 490            cand->id, cand->important ? " (important)" : "");
 491
 492   if (cand->depends_on)
 493     {
 494       fprintf (file, "  depends on ");
 495       dump_bitmap (file, cand->depends_on);
 496     }
 497
 498   if (!iv)
 499     {
 500       fprintf (file, "  final value replacement\n");
 501       return;
 502     }
 503
 504   switch (cand->pos)
 505     {
 506     case IP_NORMAL:
 507       fprintf (file, "  incremented before exit test\n");
 508       break;
 509
 510     case IP_END:
 511       fprintf (file, "  incremented at end\n");
 512       break;
 513
 514     case IP_ORIGINAL:
 515       fprintf (file, "  original biv\n");
 516       break;
 517     }
 518
 519   dump_iv (file, iv);
 520 }
 521
 522 /* Returns the info for ssa version VER.  */
 523
 524 static inline struct version_info *
 525 ver_info (struct ivopts_data *data, unsigned ver)
 526 {
 527   return data->version_info + ver;
 528 }
 529
 530 /* Returns the info for ssa name NAME.  */
 531
 532 static inline struct version_info *
 533 name_info (struct ivopts_data *data, tree name)
 534 {
 535   return ver_info (data, SSA_NAME_VERSION (name));
 536 }
 537
 538 /* Checks whether there exists number X such that X * B = A, counting modulo
 539    2^BITS.  */
 540
 541 static bool
 542 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 543         HOST_WIDE_INT *x)
 544 {
 545   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 546   unsigned HOST_WIDE_INT inv, ex, val;
 547   unsigned i;
 548
 549   a &= mask;
 550   b &= mask;
 551
 552   /* First divide the whole equation by 2 as long as possible.  */
 553   while (!(a & 1) && !(b & 1))
 554     {
 555       a >>= 1;
 556       b >>= 1;
 557       bits--;
 558       mask >>= 1;
 559     }
 560
 561   if (!(b & 1))
 562     {
 563       /* If b is still even, a is odd and there is no such x.  */
 564       return false;
 565     }
 566
 567   /* Find the inverse of b.  We compute it as
 568      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 569   inv = 1;
 570   ex = b;
 571   for (i = 0; i < bits - 1; i++)
 572     {
 573       inv = (inv * ex) & mask;
 574       ex = (ex * ex) & mask;
 575     }
 576
 577   val = (a * inv) & mask;
 578
 579   gcc_assert (((val * b) & mask) == a);
 580
 581   if ((val >> (bits - 1)) & 1)
 582     val |= ~mask;
 583
 584   *x = val;
 585
 586   return true;
 587 }
 588
 589 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 590    emitted in LOOP.  */
 591
 592 static bool
 593 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 594 {
 595   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 596
 597   gcc_assert (bb);
 598
 599   if (sbb == loop->latch)
 600     return true;
 601
 602   if (sbb != bb)
 603     return false;
 604
 605   return stmt == last_stmt (bb);
 606 }
 607
 608 /* Returns true if STMT if after the place where the original induction
 609    variable CAND is incremented.  */
 610
 611 static bool
 612 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 613 {
 614   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 615   basic_block stmt_bb = bb_for_stmt (stmt);
 616   block_stmt_iterator bsi;
 617
 618   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 619     return false;
 620
 621   if (stmt_bb != cand_bb)
 622     return true;
 623
 624   /* Scan the block from the end, since the original ivs are usually
 625      incremented at the end of the loop body.  */
 626   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 627     {
 628       if (bsi_stmt (bsi) == cand->incremented_at)
 629         return false;
 630       if (bsi_stmt (bsi) == stmt)
 631         return true;
 632     }
 633 }
 634
 635 /* Returns true if STMT if after the place where the induction variable
 636    CAND is incremented in LOOP.  */
 637
 638 static bool
 639 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 640 {
 641   switch (cand->pos)
 642     {
 643     case IP_END:
 644       return false;
 645
 646     case IP_NORMAL:
 647       return stmt_after_ip_normal_pos (loop, stmt);
 648
 649     case IP_ORIGINAL:
 650       return stmt_after_ip_original_pos (cand, stmt);
 651
 652     default:
 653       gcc_unreachable ();
 654     }
 655 }
 656
 657 /* Element of the table in that we cache the numbers of iterations obtained
 658    from exits of the loop.  */
 659
 660 struct nfe_cache_elt
 661 {
 662   /* The edge for that the number of iterations is cached.  */
 663   edge exit;
 664
 665   /* True if the # of iterations was successfully determined.  */
 666   bool valid_p;
 667
 668   /* Description of # of iterations.  */
 669   struct tree_niter_desc niter;
 670 };
 671
 672 /* Hash function for nfe_cache_elt E.  */
 673
 674 static hashval_t
 675 nfe_hash (const void *e)
 676 {
 677   const struct nfe_cache_elt *elt = e;
 678
 679   return htab_hash_pointer (elt->exit);
 680 }
 681
 682 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 683
 684 static int
 685 nfe_eq (const void *e1, const void *e2)
 686 {
 687   const struct nfe_cache_elt *elt1 = e1;
 688
 689   return elt1->exit == e2;
 690 }
 691
 692 /*  Returns structure describing number of iterations determined from
 693     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 694
 695 static struct tree_niter_desc *
 696 niter_for_exit (struct ivopts_data *data, edge exit)
 697 {
 698   struct nfe_cache_elt *nfe_desc;
 699   PTR *slot;
 700
 701   slot = htab_find_slot_with_hash (data->niters, exit,
 702                                    htab_hash_pointer (exit),
 703                                    INSERT);
 704
 705   if (!*slot)
 706     {
 707       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 708       nfe_desc->exit = exit;
 709       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 710                                                      exit, &nfe_desc->niter,
 711                                                      true);
 712       *slot = nfe_desc;
 713     }
 714   else
 715     nfe_desc = *slot;
 716
 717   if (!nfe_desc->valid_p)
 718     return NULL;
 719
 720   return &nfe_desc->niter;
 721 }
 722
 723 /* Returns structure describing number of iterations determined from
 724    single dominating exit of DATA->current_loop, or NULL if something
 725    goes wrong.  */
 726
 727 static struct tree_niter_desc *
 728 niter_for_single_dom_exit (struct ivopts_data *data)
 729 {
 730   edge exit = single_dom_exit (data->current_loop);
 731
 732   if (!exit)
 733     return NULL;
 734
 735   return niter_for_exit (data, exit);
 736 }
 737
 738 /* Initializes data structures used by the iv optimization pass, stored
 739    in DATA.  LOOPS is the loop tree.  */
 740
 741 static void
 742 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 743 {
 744   unsigned i;
 745
 746   data->version_info_size = 2 * num_ssa_names;
 747   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 748   data->relevant = BITMAP_ALLOC (NULL);
 749   data->important_candidates = BITMAP_ALLOC (NULL);
 750   data->max_inv_id = 0;
 751   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 752
 753   for (i = 1; i < loops->num; i++)
 754     if (loops->parray[i])
 755       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 756
 757   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 758   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 759   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 760 }
 761
 762 /* Returns a memory object to that EXPR points.  In case we are able to
 763    determine that it does not point to any such object, NULL is returned.  */
 764
 765 static tree
 766 determine_base_object (tree expr)
 767 {
 768   enum tree_code code = TREE_CODE (expr);
 769   tree base, obj, op0, op1;
 770
 771   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 772     return NULL_TREE;
 773
 774   switch (code)
 775     {
 776     case INTEGER_CST:
 777       return NULL_TREE;
 778
 779     case ADDR_EXPR:
 780       obj = TREE_OPERAND (expr, 0);
 781       base = get_base_address (obj);
 782
 783       if (!base)
 784         return expr;
 785
 786       if (TREE_CODE (base) == INDIRECT_REF)
 787         return determine_base_object (TREE_OPERAND (base, 0));
 788
 789       return fold_convert (ptr_type_node,
 790                            build_fold_addr_expr (base));
 791
 792     case PLUS_EXPR:
 793     case MINUS_EXPR:
 794       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 795       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 796
 797       if (!op1)
 798         return op0;
 799
 800       if (!op0)
 801         return (code == PLUS_EXPR
 802                 ? op1
 803                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 804
 805       return fold_build2 (code, ptr_type_node, op0, op1);
 806
 807     case NOP_EXPR:
 808     case CONVERT_EXPR:
 809       return determine_base_object (TREE_OPERAND (expr, 0));
 810
 811     default:
 812       return fold_convert (ptr_type_node, expr);
 813     }
 814 }
 815
 816 /* Allocates an induction variable with given initial value BASE and step STEP
 817    for loop LOOP.  */
 818
 819 static struct iv *
 820 alloc_iv (tree base, tree step)
 821 {
 822   struct iv *iv = XCNEW (struct iv);
 823
 824   if (step && integer_zerop (step))
 825     step = NULL_TREE;
 826
 827   iv->base = base;
 828   iv->base_object = determine_base_object (base);
 829   iv->step = step;
 830   iv->biv_p = false;
 831   iv->have_use_for = false;
 832   iv->use_id = 0;
 833   iv->ssa_name = NULL_TREE;
 834
 835   return iv;
 836 }
 837
 838 /* Sets STEP and BASE for induction variable IV.  */
 839
 840 static void
 841 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 842 {
 843   struct version_info *info = name_info (data, iv);
 844
 845   gcc_assert (!info->iv);
 846
 847   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 848   info->iv = alloc_iv (base, step);
 849   info->iv->ssa_name = iv;
 850 }
 851
 852 /* Finds induction variable declaration for VAR.  */
 853
 854 static struct iv *
 855 get_iv (struct ivopts_data *data, tree var)
 856 {
 857   basic_block bb;
 858
 859   if (!name_info (data, var)->iv)
 860     {
 861       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 862
 863       if (!bb
 864           || !flow_bb_inside_loop_p (data->current_loop, bb))
 865         set_iv (data, var, var, NULL_TREE);
 866     }
 867
 868   return name_info (data, var)->iv;
 869 }
 870
 871 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 872    not define a simple affine biv with nonzero step.  */
 873
 874 static tree
 875 determine_biv_step (tree phi)
 876 {
 877   struct loop *loop = bb_for_stmt (phi)->loop_father;
 878   tree name = PHI_RESULT (phi);
 879   affine_iv iv;
 880
 881   if (!is_gimple_reg (name))
 882     return NULL_TREE;
 883
 884   if (!simple_iv (loop, phi, name, &iv, true))
 885     return NULL_TREE;
 886
 887   return (zero_p (iv.step) ? NULL_TREE : iv.step);
 888 }
 889
 890 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 891
 892 static bool
 893 abnormal_ssa_name_p (tree exp)
 894 {
 895   if (!exp)
 896     return false;
 897
 898   if (TREE_CODE (exp) != SSA_NAME)
 899     return false;
 900
 901   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 902 }
 903
 904 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 905    abnormal phi node.  Callback for for_each_index.  */
 906
 907 static bool
 908 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 909                                   void *data ATTRIBUTE_UNUSED)
 910 {
 911   if (TREE_CODE (base) == ARRAY_REF)
 912     {
 913       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 914         return false;
 915       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 916         return false;
 917     }
 918
 919   return !abnormal_ssa_name_p (*index);
 920 }
 921
 922 /* Returns true if EXPR contains a ssa name that occurs in an
 923    abnormal phi node.  */
 924
 925 static bool
 926 contains_abnormal_ssa_name_p (tree expr)
 927 {
 928   enum tree_code code;
 929   enum tree_code_class class;
 930
 931   if (!expr)
 932     return false;
 933
 934   code = TREE_CODE (expr);
 935   class = TREE_CODE_CLASS (code);
 936
 937   if (code == SSA_NAME)
 938     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 939
 940   if (code == INTEGER_CST
 941       || is_gimple_min_invariant (expr))
 942     return false;
 943
 944   if (code == ADDR_EXPR)
 945     return !for_each_index (&TREE_OPERAND (expr, 0),
 946                             idx_contains_abnormal_ssa_name_p,
 947                             NULL);
 948
 949   switch (class)
 950     {
 951     case tcc_binary:
 952     case tcc_comparison:
 953       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 954         return true;
 955
 956       /* Fallthru.  */
 957     case tcc_unary:
 958       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 959         return true;
 960
 961       break;
 962
 963     default:
 964       gcc_unreachable ();
 965     }
 966
 967   return false;
 968 }
 969
 970 /* Finds basic ivs.  */
 971
 972 static bool
 973 find_bivs (struct ivopts_data *data)
 974 {
 975   tree phi, step, type, base;
 976   bool found = false;
 977   struct loop *loop = data->current_loop;
 978
 979   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 980     {
 981       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 982         continue;
 983
 984       step = determine_biv_step (phi);
 985       if (!step)
 986         continue;
 987
 988       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 989       base = expand_simple_operations (base);
 990       if (contains_abnormal_ssa_name_p (base)
 991           || contains_abnormal_ssa_name_p (step))
 992         continue;
 993
 994       type = TREE_TYPE (PHI_RESULT (phi));
 995       base = fold_convert (type, base);
 996       if (step)
 997         step = fold_convert (type, step);
 998
 999       set_iv (data, PHI_RESULT (phi), base, step);
1000       found = true;
1001     }
1002
1003   return found;
1004 }
1005
1006 /* Marks basic ivs.  */
1007
1008 static void
1009 mark_bivs (struct ivopts_data *data)
1010 {
1011   tree phi, var;
1012   struct iv *iv, *incr_iv;
1013   struct loop *loop = data->current_loop;
1014   basic_block incr_bb;
1015
1016   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1017     {
1018       iv = get_iv (data, PHI_RESULT (phi));
1019       if (!iv)
1020         continue;
1021
1022       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1023       incr_iv = get_iv (data, var);
1024       if (!incr_iv)
1025         continue;
1026
1027       /* If the increment is in the subloop, ignore it.  */
1028       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1029       if (incr_bb->loop_father != data->current_loop
1030           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1031         continue;
1032
1033       iv->biv_p = true;
1034       incr_iv->biv_p = true;
1035     }
1036 }
1037
1038 /* Checks whether STMT defines a linear induction variable and stores its
1039    parameters to IV.  */
1040
1041 static bool
1042 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1043 {
1044   tree lhs;
1045   struct loop *loop = data->current_loop;
1046
1047   iv->base = NULL_TREE;
1048   iv->step = NULL_TREE;
1049
1050   if (TREE_CODE (stmt) != MODIFY_EXPR)
1051     return false;
1052
1053   lhs = TREE_OPERAND (stmt, 0);
1054   if (TREE_CODE (lhs) != SSA_NAME)
1055     return false;
1056
1057   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1058     return false;
1059   iv->base = expand_simple_operations (iv->base);
1060
1061   if (contains_abnormal_ssa_name_p (iv->base)
1062       || contains_abnormal_ssa_name_p (iv->step))
1063     return false;
1064
1065   return true;
1066 }
1067
1068 /* Finds general ivs in statement STMT.  */
1069
1070 static void
1071 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1072 {
1073   affine_iv iv;
1074
1075   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1076     return;
1077
1078   set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1079 }
1080
1081 /* Finds general ivs in basic block BB.  */
1082
1083 static void
1084 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1085 {
1086   block_stmt_iterator bsi;
1087
1088   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1089     find_givs_in_stmt (data, bsi_stmt (bsi));
1090 }
1091
1092 /* Finds general ivs.  */
1093
1094 static void
1095 find_givs (struct ivopts_data *data)
1096 {
1097   struct loop *loop = data->current_loop;
1098   basic_block *body = get_loop_body_in_dom_order (loop);
1099   unsigned i;
1100
1101   for (i = 0; i < loop->num_nodes; i++)
1102     find_givs_in_bb (data, body[i]);
1103   free (body);
1104 }
1105
1106 /* For each ssa name defined in LOOP determines whether it is an induction
1107    variable and if so, its initial value and step.  */
1108
1109 static bool
1110 find_induction_variables (struct ivopts_data *data)
1111 {
1112   unsigned i;
1113   bitmap_iterator bi;
1114
1115   if (!find_bivs (data))
1116     return false;
1117
1118   find_givs (data);
1119   mark_bivs (data);
1120
1121   if (dump_file && (dump_flags & TDF_DETAILS))
1122     {
1123       struct tree_niter_desc *niter;
1124
1125       niter = niter_for_single_dom_exit (data);
1126
1127       if (niter)
1128         {
1129           fprintf (dump_file, "  number of iterations ");
1130           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1131           fprintf (dump_file, "\n");
1132
1133           fprintf (dump_file, "  may be zero if ");
1134           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1135           fprintf (dump_file, "\n");
1136           fprintf (dump_file, "\n");
1137         };
1138
1139       fprintf (dump_file, "Induction variables:\n\n");
1140
1141       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1142         {
1143           if (ver_info (data, i)->iv)
1144             dump_iv (dump_file, ver_info (data, i)->iv);
1145         }
1146     }
1147
1148   return true;
1149 }
1150
1151 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1152
1153 static struct iv_use *
1154 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1155             tree stmt, enum use_type use_type)
1156 {
1157   struct iv_use *use = XCNEW (struct iv_use);
1158
1159   use->id = n_iv_uses (data);
1160   use->type = use_type;
1161   use->iv = iv;
1162   use->stmt = stmt;
1163   use->op_p = use_p;
1164   use->related_cands = BITMAP_ALLOC (NULL);
1165
1166   /* To avoid showing ssa name in the dumps, if it was not reset by the
1167      caller.  */
1168   iv->ssa_name = NULL_TREE;
1169
1170   if (dump_file && (dump_flags & TDF_DETAILS))
1171     dump_use (dump_file, use);
1172
1173   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1174
1175   return use;
1176 }
1177
1178 /* Checks whether OP is a loop-level invariant and if so, records it.
1179    NONLINEAR_USE is true if the invariant is used in a way we do not
1180    handle specially.  */
1181
1182 static void
1183 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1184 {
1185   basic_block bb;
1186   struct version_info *info;
1187
1188   if (TREE_CODE (op) != SSA_NAME
1189       || !is_gimple_reg (op))
1190     return;
1191
1192   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1193   if (bb
1194       && flow_bb_inside_loop_p (data->current_loop, bb))
1195     return;
1196
1197   info = name_info (data, op);
1198   info->name = op;
1199   info->has_nonlin_use |= nonlinear_use;
1200   if (!info->inv_id)
1201     info->inv_id = ++data->max_inv_id;
1202   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1203 }
1204
1205 /* Checks whether the use OP is interesting and if so, records it.  */
1206
1207 static struct iv_use *
1208 find_interesting_uses_op (struct ivopts_data *data, tree op)
1209 {
1210   struct iv *iv;
1211   struct iv *civ;
1212   tree stmt;
1213   struct iv_use *use;
1214
1215   if (TREE_CODE (op) != SSA_NAME)
1216     return NULL;
1217
1218   iv = get_iv (data, op);
1219   if (!iv)
1220     return NULL;
1221
1222   if (iv->have_use_for)
1223     {
1224       use = iv_use (data, iv->use_id);
1225
1226       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1227       return use;
1228     }
1229
1230   if (zero_p (iv->step))
1231     {
1232       record_invariant (data, op, true);
1233       return NULL;
1234     }
1235   iv->have_use_for = true;
1236
1237   civ = XNEW (struct iv);
1238   *civ = *iv;
1239
1240   stmt = SSA_NAME_DEF_STMT (op);
1241   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1242               || TREE_CODE (stmt) == MODIFY_EXPR);
1243
1244   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1245   iv->use_id = use->id;
1246
1247   return use;
1248 }
1249
1250 /* Checks whether the condition *COND_P in STMT is interesting
1251    and if so, records it.  */
1252
1253 static void
1254 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1255 {
1256   tree *op0_p;
1257   tree *op1_p;
1258   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1259   struct iv const_iv;
1260   tree zero = integer_zero_node;
1261
1262   const_iv.step = NULL_TREE;
1263
1264   if (TREE_CODE (*cond_p) != SSA_NAME
1265       && !COMPARISON_CLASS_P (*cond_p))
1266     return;
1267
1268   if (TREE_CODE (*cond_p) == SSA_NAME)
1269     {
1270       op0_p = cond_p;
1271       op1_p = &zero;
1272     }
1273   else
1274     {
1275       op0_p = &TREE_OPERAND (*cond_p, 0);
1276       op1_p = &TREE_OPERAND (*cond_p, 1);
1277     }
1278
1279   if (TREE_CODE (*op0_p) == SSA_NAME)
1280     iv0 = get_iv (data, *op0_p);
1281   else
1282     iv0 = &const_iv;
1283
1284   if (TREE_CODE (*op1_p) == SSA_NAME)
1285     iv1 = get_iv (data, *op1_p);
1286   else
1287     iv1 = &const_iv;
1288
1289   if (/* When comparing with non-invariant value, we may not do any senseful
1290          induction variable elimination.  */
1291       (!iv0 || !iv1)
1292       /* Eliminating condition based on two ivs would be nontrivial.
1293          ??? TODO -- it is not really important to handle this case.  */
1294       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1295     {
1296       find_interesting_uses_op (data, *op0_p);
1297       find_interesting_uses_op (data, *op1_p);
1298       return;
1299     }
1300
1301   if (zero_p (iv0->step) && zero_p (iv1->step))
1302     {
1303       /* If both are invariants, this is a work for unswitching.  */
1304       return;
1305     }
1306
1307   civ = XNEW (struct iv);
1308   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1309   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1310 }
1311
1312 /* Returns true if expression EXPR is obviously invariant in LOOP,
1313    i.e. if all its operands are defined outside of the LOOP.  */
1314
1315 bool
1316 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1317 {
1318   basic_block def_bb;
1319   unsigned i, len;
1320
1321   if (is_gimple_min_invariant (expr))
1322     return true;
1323
1324   if (TREE_CODE (expr) == SSA_NAME)
1325     {
1326       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1327       if (def_bb
1328           && flow_bb_inside_loop_p (loop, def_bb))
1329         return false;
1330
1331       return true;
1332     }
1333
1334   if (!EXPR_P (expr))
1335     return false;
1336
1337   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1338   for (i = 0; i < len; i++)
1339     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1340       return false;
1341
1342   return true;
1343 }
1344
1345 /* Cumulates the steps of indices into DATA and replaces their values with the
1346    initial ones.  Returns false when the value of the index cannot be determined.
1347    Callback for for_each_index.  */
1348
1349 struct ifs_ivopts_data
1350 {
1351   struct ivopts_data *ivopts_data;
1352   tree stmt;
1353   tree *step_p;
1354 };
1355
1356 static bool
1357 idx_find_step (tree base, tree *idx, void *data)
1358 {
1359   struct ifs_ivopts_data *dta = data;
1360   struct iv *iv;
1361   tree step, iv_step, lbound, off;
1362   struct loop *loop = dta->ivopts_data->current_loop;
1363
1364   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1365       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1366     return false;
1367
1368   /* If base is a component ref, require that the offset of the reference
1369      be invariant.  */
1370   if (TREE_CODE (base) == COMPONENT_REF)
1371     {
1372       off = component_ref_field_offset (base);
1373       return expr_invariant_in_loop_p (loop, off);
1374     }
1375
1376   /* If base is array, first check whether we will be able to move the
1377      reference out of the loop (in order to take its address in strength
1378      reduction).  In order for this to work we need both lower bound
1379      and step to be loop invariants.  */
1380   if (TREE_CODE (base) == ARRAY_REF)
1381     {
1382       step = array_ref_element_size (base);
1383       lbound = array_ref_low_bound (base);
1384
1385       if (!expr_invariant_in_loop_p (loop, step)
1386           || !expr_invariant_in_loop_p (loop, lbound))
1387         return false;
1388     }
1389
1390   if (TREE_CODE (*idx) != SSA_NAME)
1391     return true;
1392
1393   iv = get_iv (dta->ivopts_data, *idx);
1394   if (!iv)
1395     return false;
1396
1397   *idx = iv->base;
1398
1399   if (!iv->step)
1400     return true;
1401
1402   if (TREE_CODE (base) == ARRAY_REF)
1403     {
1404       step = array_ref_element_size (base);
1405
1406       /* We only handle addresses whose step is an integer constant.  */
1407       if (TREE_CODE (step) != INTEGER_CST)
1408         return false;
1409     }
1410   else
1411     /* The step for pointer arithmetics already is 1 byte.  */
1412     step = build_int_cst (sizetype, 1);
1413
1414   /* FIXME: convert_step should not be used outside chrec_convert: fix
1415      this by calling chrec_convert.  */
1416   iv_step = convert_step (dta->ivopts_data->current_loop,
1417                           sizetype, iv->base, iv->step, dta->stmt);
1418
1419   if (!iv_step)
1420     {
1421       /* The index might wrap.  */
1422       return false;
1423     }
1424
1425   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1426
1427   if (!*dta->step_p)
1428     *dta->step_p = step;
1429   else
1430     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1431
1432   return true;
1433 }
1434
1435 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1436    object is passed to it in DATA.  */
1437
1438 static bool
1439 idx_record_use (tree base, tree *idx,
1440                 void *data)
1441 {
1442   find_interesting_uses_op (data, *idx);
1443   if (TREE_CODE (base) == ARRAY_REF)
1444     {
1445       find_interesting_uses_op (data, array_ref_element_size (base));
1446       find_interesting_uses_op (data, array_ref_low_bound (base));
1447     }
1448   return true;
1449 }
1450
1451 /* Returns true if memory reference REF may be unaligned.  */
1452
1453 static bool
1454 may_be_unaligned_p (tree ref)
1455 {
1456   tree base;
1457   tree base_type;
1458   HOST_WIDE_INT bitsize;
1459   HOST_WIDE_INT bitpos;
1460   tree toffset;
1461   enum machine_mode mode;
1462   int unsignedp, volatilep;
1463   unsigned base_align;
1464
1465   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1466      thus they are not misaligned.  */
1467   if (TREE_CODE (ref) == TARGET_MEM_REF)
1468     return false;
1469
1470   /* The test below is basically copy of what expr.c:normal_inner_ref
1471      does to check whether the object must be loaded by parts when
1472      STRICT_ALIGNMENT is true.  */
1473   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1474                               &unsignedp, &volatilep, true);
1475   base_type = TREE_TYPE (base);
1476   base_align = TYPE_ALIGN (base_type);
1477
1478   if (mode != BLKmode
1479       && (base_align < GET_MODE_ALIGNMENT (mode)
1480           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1481           || bitpos % BITS_PER_UNIT != 0))
1482     return true;
1483
1484   return false;
1485 }
1486
1487 /* Finds addresses in *OP_P inside STMT.  */
1488
1489 static void
1490 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1491 {
1492   tree base = *op_p, step = NULL;
1493   struct iv *civ;
1494   struct ifs_ivopts_data ifs_ivopts_data;
1495
1496   /* Do not play with volatile memory references.  A bit too conservative,
1497      perhaps, but safe.  */
1498   if (stmt_ann (stmt)->has_volatile_ops)
1499     goto fail;
1500
1501   /* Ignore bitfields for now.  Not really something terribly complicated
1502      to handle.  TODO.  */
1503   if (TREE_CODE (base) == COMPONENT_REF
1504       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1505     goto fail;
1506
1507   if (STRICT_ALIGNMENT
1508       && may_be_unaligned_p (base))
1509     goto fail;
1510
1511   base = unshare_expr (base);
1512
1513   if (TREE_CODE (base) == TARGET_MEM_REF)
1514     {
1515       tree type = build_pointer_type (TREE_TYPE (base));
1516       tree astep;
1517
1518       if (TMR_BASE (base)
1519           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1520         {
1521           civ = get_iv (data, TMR_BASE (base));
1522           if (!civ)
1523             goto fail;
1524
1525           TMR_BASE (base) = civ->base;
1526           step = civ->step;
1527         }
1528       if (TMR_INDEX (base)
1529           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1530         {
1531           civ = get_iv (data, TMR_INDEX (base));
1532           if (!civ)
1533             goto fail;
1534
1535           TMR_INDEX (base) = civ->base;
1536           astep = civ->step;
1537
1538           if (astep)
1539             {
1540               if (TMR_STEP (base))
1541                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1542
1543               if (step)
1544                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1545               else
1546                 step = astep;
1547             }
1548         }
1549
1550       if (zero_p (step))
1551         goto fail;
1552       base = tree_mem_ref_addr (type, base);
1553     }
1554   else
1555     {
1556       ifs_ivopts_data.ivopts_data = data;
1557       ifs_ivopts_data.stmt = stmt;
1558       ifs_ivopts_data.step_p = &step;
1559       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1560           || zero_p (step))
1561         goto fail;
1562
1563       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1564       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1565
1566       base = build_fold_addr_expr (base);
1567     }
1568
1569   civ = alloc_iv (base, step);
1570   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1571   return;
1572
1573 fail:
1574   for_each_index (op_p, idx_record_use, data);
1575 }
1576
1577 /* Finds and records invariants used in STMT.  */
1578
1579 static void
1580 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1581 {
1582   ssa_op_iter iter;
1583   use_operand_p use_p;
1584   tree op;
1585
1586   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1587     {
1588       op = USE_FROM_PTR (use_p);
1589       record_invariant (data, op, false);
1590     }
1591 }
1592
1593 /* Finds interesting uses of induction variables in the statement STMT.  */
1594
1595 static void
1596 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1597 {
1598   struct iv *iv;
1599   tree op, lhs, rhs;
1600   ssa_op_iter iter;
1601   use_operand_p use_p;
1602
1603   find_invariants_stmt (data, stmt);
1604
1605   if (TREE_CODE (stmt) == COND_EXPR)
1606     {
1607       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1608       return;
1609     }
1610
1611   if (TREE_CODE (stmt) == MODIFY_EXPR)
1612     {
1613       lhs = TREE_OPERAND (stmt, 0);
1614       rhs = TREE_OPERAND (stmt, 1);
1615
1616       if (TREE_CODE (lhs) == SSA_NAME)
1617         {
1618           /* If the statement defines an induction variable, the uses are not
1619              interesting by themselves.  */
1620
1621           iv = get_iv (data, lhs);
1622
1623           if (iv && !zero_p (iv->step))
1624             return;
1625         }
1626
1627       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1628         {
1629         case tcc_comparison:
1630           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1631           return;
1632
1633         case tcc_reference:
1634           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1635           if (REFERENCE_CLASS_P (lhs))
1636             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1637           return;
1638
1639         default: ;
1640         }
1641
1642       if (REFERENCE_CLASS_P (lhs)
1643           && is_gimple_val (rhs))
1644         {
1645           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1646           find_interesting_uses_op (data, rhs);
1647           return;
1648         }
1649
1650       /* TODO -- we should also handle address uses of type
1651
1652          memory = call (whatever);
1653
1654          and
1655
1656          call (memory).  */
1657     }
1658
1659   if (TREE_CODE (stmt) == PHI_NODE
1660       && bb_for_stmt (stmt) == data->current_loop->header)
1661     {
1662       lhs = PHI_RESULT (stmt);
1663       iv = get_iv (data, lhs);
1664
1665       if (iv && !zero_p (iv->step))
1666         return;
1667     }
1668
1669   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1670     {
1671       op = USE_FROM_PTR (use_p);
1672
1673       if (TREE_CODE (op) != SSA_NAME)
1674         continue;
1675
1676       iv = get_iv (data, op);
1677       if (!iv)
1678         continue;
1679
1680       find_interesting_uses_op (data, op);
1681     }
1682 }
1683
1684 /* Finds interesting uses of induction variables outside of loops
1685    on loop exit edge EXIT.  */
1686
1687 static void
1688 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1689 {
1690   tree phi, def;
1691
1692   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1693     {
1694       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1695       find_interesting_uses_op (data, def);
1696     }
1697 }
1698
1699 /* Finds uses of the induction variables that are interesting.  */
1700
1701 static void
1702 find_interesting_uses (struct ivopts_data *data)
1703 {
1704   basic_block bb;
1705   block_stmt_iterator bsi;
1706   tree phi;
1707   basic_block *body = get_loop_body (data->current_loop);
1708   unsigned i;
1709   struct version_info *info;
1710   edge e;
1711
1712   if (dump_file && (dump_flags & TDF_DETAILS))
1713     fprintf (dump_file, "Uses:\n\n");
1714
1715   for (i = 0; i < data->current_loop->num_nodes; i++)
1716     {
1717       edge_iterator ei;
1718       bb = body[i];
1719
1720       FOR_EACH_EDGE (e, ei, bb->succs)
1721         if (e->dest != EXIT_BLOCK_PTR
1722             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1723           find_interesting_uses_outside (data, e);
1724
1725       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1726         find_interesting_uses_stmt (data, phi);
1727       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1728         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1729     }
1730
1731   if (dump_file && (dump_flags & TDF_DETAILS))
1732     {
1733       bitmap_iterator bi;
1734
1735       fprintf (dump_file, "\n");
1736
1737       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1738         {
1739           info = ver_info (data, i);
1740           if (info->inv_id)
1741             {
1742               fprintf (dump_file, "  ");
1743               print_generic_expr (dump_file, info->name, TDF_SLIM);
1744               fprintf (dump_file, " is invariant (%d)%s\n",
1745                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1746             }
1747         }
1748
1749       fprintf (dump_file, "\n");
1750     }
1751
1752   free (body);
1753 }
1754
1755 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1756    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1757    we are at the top-level of the processed address.  */
1758
1759 static tree
1760 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1761                 unsigned HOST_WIDE_INT *offset)
1762 {
1763   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1764   enum tree_code code;
1765   tree type, orig_type = TREE_TYPE (expr);
1766   unsigned HOST_WIDE_INT off0, off1, st;
1767   tree orig_expr = expr;
1768
1769   STRIP_NOPS (expr);
1770
1771   type = TREE_TYPE (expr);
1772   code = TREE_CODE (expr);
1773   *offset = 0;
1774
1775   switch (code)
1776     {
1777     case INTEGER_CST:
1778       if (!cst_and_fits_in_hwi (expr)
1779           || zero_p (expr))
1780         return orig_expr;
1781
1782       *offset = int_cst_value (expr);
1783       return build_int_cst_type (orig_type, 0);
1784
1785     case PLUS_EXPR:
1786     case MINUS_EXPR:
1787       op0 = TREE_OPERAND (expr, 0);
1788       op1 = TREE_OPERAND (expr, 1);
1789
1790       op0 = strip_offset_1 (op0, false, false, &off0);
1791       op1 = strip_offset_1 (op1, false, false, &off1);
1792
1793       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1794       if (op0 == TREE_OPERAND (expr, 0)
1795           && op1 == TREE_OPERAND (expr, 1))
1796         return orig_expr;
1797
1798       if (zero_p (op1))
1799         expr = op0;
1800       else if (zero_p (op0))
1801         {
1802           if (code == PLUS_EXPR)
1803             expr = op1;
1804           else
1805             expr = fold_build1 (NEGATE_EXPR, type, op1);
1806         }
1807       else
1808         expr = fold_build2 (code, type, op0, op1);
1809
1810       return fold_convert (orig_type, expr);
1811
1812     case ARRAY_REF:
1813       if (!inside_addr)
1814         return orig_expr;
1815
1816       step = array_ref_element_size (expr);
1817       if (!cst_and_fits_in_hwi (step))
1818         break;
1819
1820       st = int_cst_value (step);
1821       op1 = TREE_OPERAND (expr, 1);
1822       op1 = strip_offset_1 (op1, false, false, &off1);
1823       *offset = off1 * st;
1824
1825       if (top_compref
1826           && zero_p (op1))
1827         {
1828           /* Strip the component reference completely.  */
1829           op0 = TREE_OPERAND (expr, 0);
1830           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1831           *offset += off0;
1832           return op0;
1833         }
1834       break;
1835
1836     case COMPONENT_REF:
1837       if (!inside_addr)
1838         return orig_expr;
1839
1840       tmp = component_ref_field_offset (expr);
1841       if (top_compref
1842           && cst_and_fits_in_hwi (tmp))
1843         {
1844           /* Strip the component reference completely.  */
1845           op0 = TREE_OPERAND (expr, 0);
1846           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1847           *offset = off0 + int_cst_value (tmp);
1848           return op0;
1849         }
1850       break;
1851
1852     case ADDR_EXPR:
1853       op0 = TREE_OPERAND (expr, 0);
1854       op0 = strip_offset_1 (op0, true, true, &off0);
1855       *offset += off0;
1856
1857       if (op0 == TREE_OPERAND (expr, 0))
1858         return orig_expr;
1859
1860       expr = build_fold_addr_expr (op0);
1861       return fold_convert (orig_type, expr);
1862
1863     case INDIRECT_REF:
1864       inside_addr = false;
1865       break;
1866
1867     default:
1868       return orig_expr;
1869     }
1870
1871   /* Default handling of expressions for that we want to recurse into
1872      the first operand.  */
1873   op0 = TREE_OPERAND (expr, 0);
1874   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1875   *offset += off0;
1876
1877   if (op0 == TREE_OPERAND (expr, 0)
1878       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1879     return orig_expr;
1880
1881   expr = copy_node (expr);
1882   TREE_OPERAND (expr, 0) = op0;
1883   if (op1)
1884     TREE_OPERAND (expr, 1) = op1;
1885
1886   /* Inside address, we might strip the top level component references,
1887      thus changing type of the expression.  Handling of ADDR_EXPR
1888      will fix that.  */
1889   expr = fold_convert (orig_type, expr);
1890
1891   return expr;
1892 }
1893
1894 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1895
1896 static tree
1897 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1898 {
1899   return strip_offset_1 (expr, false, false, offset);
1900 }
1901
1902 /* Returns variant of TYPE that can be used as base for different uses.
1903    For integer types, we return unsigned variant of the type, which
1904    avoids problems with overflows.  For pointer types, we return void *.  */
1905
1906 static tree
1907 generic_type_for (tree type)
1908 {
1909   if (POINTER_TYPE_P (type))
1910     return ptr_type_node;
1911
1912   if (TYPE_UNSIGNED (type))
1913     return type;
1914
1915   return unsigned_type_for (type);
1916 }
1917
1918 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1919    the bitmap to that we should store it.  */
1920
1921 static struct ivopts_data *fd_ivopts_data;
1922 static tree
1923 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1924 {
1925   bitmap *depends_on = data;
1926   struct version_info *info;
1927
1928   if (TREE_CODE (*expr_p) != SSA_NAME)
1929     return NULL_TREE;
1930   info = name_info (fd_ivopts_data, *expr_p);
1931
1932   if (!info->inv_id || info->has_nonlin_use)
1933     return NULL_TREE;
1934
1935   if (!*depends_on)
1936     *depends_on = BITMAP_ALLOC (NULL);
1937   bitmap_set_bit (*depends_on, info->inv_id);
1938
1939   return NULL_TREE;
1940 }
1941
1942 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1943    position to POS.  If USE is not NULL, the candidate is set as related to
1944    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1945    replacement of the final value of the iv by a direct computation.  */
1946
1947 static struct iv_cand *
1948 add_candidate_1 (struct ivopts_data *data,
1949                  tree base, tree step, bool important, enum iv_position pos,
1950                  struct iv_use *use, tree incremented_at)
1951 {
1952   unsigned i;
1953   struct iv_cand *cand = NULL;
1954   tree type, orig_type;
1955
1956   if (base)
1957     {
1958       orig_type = TREE_TYPE (base);
1959       type = generic_type_for (orig_type);
1960       if (type != orig_type)
1961         {
1962           base = fold_convert (type, base);
1963           if (step)
1964             step = fold_convert (type, step);
1965         }
1966     }
1967
1968   for (i = 0; i < n_iv_cands (data); i++)
1969     {
1970       cand = iv_cand (data, i);
1971
1972       if (cand->pos != pos)
1973         continue;
1974
1975       if (cand->incremented_at != incremented_at)
1976         continue;
1977
1978       if (!cand->iv)
1979         {
1980           if (!base && !step)
1981             break;
1982
1983           continue;
1984         }
1985
1986       if (!base && !step)
1987         continue;
1988
1989       if (!operand_equal_p (base, cand->iv->base, 0))
1990         continue;
1991
1992       if (zero_p (cand->iv->step))
1993         {
1994           if (zero_p (step))
1995             break;
1996         }
1997       else
1998         {
1999           if (step && operand_equal_p (step, cand->iv->step, 0))
2000             break;
2001         }
2002     }
2003
2004   if (i == n_iv_cands (data))
2005     {
2006       cand = XCNEW (struct iv_cand);
2007       cand->id = i;
2008
2009       if (!base && !step)
2010         cand->iv = NULL;
2011       else
2012         cand->iv = alloc_iv (base, step);
2013
2014       cand->pos = pos;
2015       if (pos != IP_ORIGINAL && cand->iv)
2016         {
2017           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2018           cand->var_after = cand->var_before;
2019         }
2020       cand->important = important;
2021       cand->incremented_at = incremented_at;
2022       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2023
2024       if (step
2025           && TREE_CODE (step) != INTEGER_CST)
2026         {
2027           fd_ivopts_data = data;
2028           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2029         }
2030
2031       if (dump_file && (dump_flags & TDF_DETAILS))
2032         dump_cand (dump_file, cand);
2033     }
2034
2035   if (important && !cand->important)
2036     {
2037       cand->important = true;
2038       if (dump_file && (dump_flags & TDF_DETAILS))
2039         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2040     }
2041
2042   if (use)
2043     {
2044       bitmap_set_bit (use->related_cands, i);
2045       if (dump_file && (dump_flags & TDF_DETAILS))
2046         fprintf (dump_file, "Candidate %d is related to use %d\n",
2047                  cand->id, use->id);
2048     }
2049
2050   return cand;
2051 }
2052
2053 /* Returns true if incrementing the induction variable at the end of the LOOP
2054    is allowed.
2055
2056    The purpose is to avoid splitting latch edge with a biv increment, thus
2057    creating a jump, possibly confusing other optimization passes and leaving
2058    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2059    is not available (so we do not have a better alternative), or if the latch
2060    edge is already nonempty.  */
2061
2062 static bool
2063 allow_ip_end_pos_p (struct loop *loop)
2064 {
2065   if (!ip_normal_pos (loop))
2066     return true;
2067
2068   if (!empty_block_p (ip_end_pos (loop)))
2069     return true;
2070
2071   return false;
2072 }
2073
2074 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2075    position to POS.  If USE is not NULL, the candidate is set as related to
2076    it.  The candidate computation is scheduled on all available positions.  */
2077
2078 static void
2079 add_candidate (struct ivopts_data *data,
2080                tree base, tree step, bool important, struct iv_use *use)
2081 {
2082   if (ip_normal_pos (data->current_loop))
2083     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2084   if (ip_end_pos (data->current_loop)
2085       && allow_ip_end_pos_p (data->current_loop))
2086     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2087 }
2088
2089 /* Add a standard "0 + 1 * iteration" iv candidate for a
2090    type with SIZE bits.  */
2091
2092 static void
2093 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2094                                      unsigned int size)
2095 {
2096   tree type = lang_hooks.types.type_for_size (size, true);
2097   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2098                  true, NULL);
2099 }
2100
2101 /* Adds standard iv candidates.  */
2102
2103 static void
2104 add_standard_iv_candidates (struct ivopts_data *data)
2105 {
2106   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2107
2108   /* The same for a double-integer type if it is still fast enough.  */
2109   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2110     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2111 }
2112
2113
2114 /* Adds candidates bases on the old induction variable IV.  */
2115
2116 static void
2117 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2118 {
2119   tree phi, def;
2120   struct iv_cand *cand;
2121
2122   add_candidate (data, iv->base, iv->step, true, NULL);
2123
2124   /* The same, but with initial value zero.  */
2125   add_candidate (data,
2126                  build_int_cst (TREE_TYPE (iv->base), 0),
2127                  iv->step, true, NULL);
2128
2129   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2130   if (TREE_CODE (phi) == PHI_NODE)
2131     {
2132       /* Additionally record the possibility of leaving the original iv
2133          untouched.  */
2134       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2135       cand = add_candidate_1 (data,
2136                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2137                               SSA_NAME_DEF_STMT (def));
2138       cand->var_before = iv->ssa_name;
2139       cand->var_after = def;
2140     }
2141 }
2142
2143 /* Adds candidates based on the old induction variables.  */
2144
2145 static void
2146 add_old_ivs_candidates (struct ivopts_data *data)
2147 {
2148   unsigned i;
2149   struct iv *iv;
2150   bitmap_iterator bi;
2151
2152   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2153     {
2154       iv = ver_info (data, i)->iv;
2155       if (iv && iv->biv_p && !zero_p (iv->step))
2156         add_old_iv_candidates (data, iv);
2157     }
2158 }
2159
2160 /* Adds candidates based on the value of the induction variable IV and USE.  */
2161
2162 static void
2163 add_iv_value_candidates (struct ivopts_data *data,
2164                          struct iv *iv, struct iv_use *use)
2165 {
2166   unsigned HOST_WIDE_INT offset;
2167   tree base;
2168
2169   add_candidate (data, iv->base, iv->step, false, use);
2170
2171   /* The same, but with initial value zero.  Make such variable important,
2172      since it is generic enough so that possibly many uses may be based
2173      on it.  */
2174   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2175                  iv->step, true, use);
2176
2177   /* Third, try removing the constant offset.  */
2178   base = strip_offset (iv->base, &offset);
2179   if (offset)
2180     add_candidate (data, base, iv->step, false, use);
2181 }
2182
2183 /* Adds candidates based on the uses.  */
2184
2185 static void
2186 add_derived_ivs_candidates (struct ivopts_data *data)
2187 {
2188   unsigned i;
2189
2190   for (i = 0; i < n_iv_uses (data); i++)
2191     {
2192       struct iv_use *use = iv_use (data, i);
2193
2194       if (!use)
2195         continue;
2196
2197       switch (use->type)
2198         {
2199         case USE_NONLINEAR_EXPR:
2200         case USE_COMPARE:
2201         case USE_ADDRESS:
2202           /* Just add the ivs based on the value of the iv used here.  */
2203           add_iv_value_candidates (data, use->iv, use);
2204           break;
2205
2206         default:
2207           gcc_unreachable ();
2208         }
2209     }
2210 }
2211
2212 /* Record important candidates and add them to related_cands bitmaps
2213    if needed.  */
2214
2215 static void
2216 record_important_candidates (struct ivopts_data *data)
2217 {
2218   unsigned i;
2219   struct iv_use *use;
2220
2221   for (i = 0; i < n_iv_cands (data); i++)
2222     {
2223       struct iv_cand *cand = iv_cand (data, i);
2224
2225       if (cand->important)
2226         bitmap_set_bit (data->important_candidates, i);
2227     }
2228
2229   data->consider_all_candidates = (n_iv_cands (data)
2230                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2231
2232   if (data->consider_all_candidates)
2233     {
2234       /* We will not need "related_cands" bitmaps in this case,
2235          so release them to decrease peak memory consumption.  */
2236       for (i = 0; i < n_iv_uses (data); i++)
2237         {
2238           use = iv_use (data, i);
2239           BITMAP_FREE (use->related_cands);
2240         }
2241     }
2242   else
2243     {
2244       /* Add important candidates to the related_cands bitmaps.  */
2245       for (i = 0; i < n_iv_uses (data); i++)
2246         bitmap_ior_into (iv_use (data, i)->related_cands,
2247                          data->important_candidates);
2248     }
2249 }
2250
2251 /* Finds the candidates for the induction variables.  */
2252
2253 static void
2254 find_iv_candidates (struct ivopts_data *data)
2255 {
2256   /* Add commonly used ivs.  */
2257   add_standard_iv_candidates (data);
2258
2259   /* Add old induction variables.  */
2260   add_old_ivs_candidates (data);
2261
2262   /* Add induction variables derived from uses.  */
2263   add_derived_ivs_candidates (data);
2264
2265   /* Record the important candidates.  */
2266   record_important_candidates (data);
2267 }
2268
2269 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2270    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2271    we allocate a simple list to every use.  */
2272
2273 static void
2274 alloc_use_cost_map (struct ivopts_data *data)
2275 {
2276   unsigned i, size, s, j;
2277
2278   for (i = 0; i < n_iv_uses (data); i++)
2279     {
2280       struct iv_use *use = iv_use (data, i);
2281       bitmap_iterator bi;
2282
2283       if (data->consider_all_candidates)
2284         size = n_iv_cands (data);
2285       else
2286         {
2287           s = 0;
2288           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2289             {
2290               s++;
2291             }
2292
2293           /* Round up to the power of two, so that moduling by it is fast.  */
2294           for (size = 1; size < s; size <<= 1)
2295             continue;
2296         }
2297
2298       use->n_map_members = size;
2299       use->cost_map = XCNEWVEC (struct cost_pair, size);
2300     }
2301 }
2302
2303 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2304    on invariants DEPENDS_ON and that the value used in expressing it
2305    is VALUE.*/
2306
2307 static void
2308 set_use_iv_cost (struct ivopts_data *data,
2309                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2310                  bitmap depends_on, tree value)
2311 {
2312   unsigned i, s;
2313
2314   if (cost == INFTY)
2315     {
2316       BITMAP_FREE (depends_on);
2317       return;
2318     }
2319
2320   if (data->consider_all_candidates)
2321     {
2322       use->cost_map[cand->id].cand = cand;
2323       use->cost_map[cand->id].cost = cost;
2324       use->cost_map[cand->id].depends_on = depends_on;
2325       use->cost_map[cand->id].value = value;
2326       return;
2327     }
2328
2329   /* n_map_members is a power of two, so this computes modulo.  */
2330   s = cand->id & (use->n_map_members - 1);
2331   for (i = s; i < use->n_map_members; i++)
2332     if (!use->cost_map[i].cand)
2333       goto found;
2334   for (i = 0; i < s; i++)
2335     if (!use->cost_map[i].cand)
2336       goto found;
2337
2338   gcc_unreachable ();
2339
2340 found:
2341   use->cost_map[i].cand = cand;
2342   use->cost_map[i].cost = cost;
2343   use->cost_map[i].depends_on = depends_on;
2344   use->cost_map[i].value = value;
2345 }
2346
2347 /* Gets cost of (USE, CANDIDATE) pair.  */
2348
2349 static struct cost_pair *
2350 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2351                  struct iv_cand *cand)
2352 {
2353   unsigned i, s;
2354   struct cost_pair *ret;
2355
2356   if (!cand)
2357     return NULL;
2358
2359   if (data->consider_all_candidates)
2360     {
2361       ret = use->cost_map + cand->id;
2362       if (!ret->cand)
2363         return NULL;
2364
2365       return ret;
2366     }
2367
2368   /* n_map_members is a power of two, so this computes modulo.  */
2369   s = cand->id & (use->n_map_members - 1);
2370   for (i = s; i < use->n_map_members; i++)
2371     if (use->cost_map[i].cand == cand)
2372       return use->cost_map + i;
2373
2374   for (i = 0; i < s; i++)
2375     if (use->cost_map[i].cand == cand)
2376       return use->cost_map + i;
2377
2378   return NULL;
2379 }
2380
2381 /* Returns estimate on cost of computing SEQ.  */
2382
2383 static unsigned
2384 seq_cost (rtx seq)
2385 {
2386   unsigned cost = 0;
2387   rtx set;
2388
2389   for (; seq; seq = NEXT_INSN (seq))
2390     {
2391       set = single_set (seq);
2392       if (set)
2393         cost += rtx_cost (set, SET);
2394       else
2395         cost++;
2396     }
2397
2398   return cost;
2399 }
2400
2401 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2402 static rtx
2403 produce_memory_decl_rtl (tree obj, int *regno)
2404 {
2405   rtx x;
2406
2407   gcc_assert (obj);
2408   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2409     {
2410       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2411       x = gen_rtx_SYMBOL_REF (Pmode, name);
2412     }
2413   else
2414     x = gen_raw_REG (Pmode, (*regno)++);
2415
2416   return gen_rtx_MEM (DECL_MODE (obj), x);
2417 }
2418
2419 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2420    walk_tree.  DATA contains the actual fake register number.  */
2421
2422 static tree
2423 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2424 {
2425   tree obj = NULL_TREE;
2426   rtx x = NULL_RTX;
2427   int *regno = data;
2428
2429   switch (TREE_CODE (*expr_p))
2430     {
2431     case ADDR_EXPR:
2432       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2433            handled_component_p (*expr_p);
2434            expr_p = &TREE_OPERAND (*expr_p, 0))
2435         continue;
2436       obj = *expr_p;
2437       if (DECL_P (obj))
2438         x = produce_memory_decl_rtl (obj, regno);
2439       break;
2440
2441     case SSA_NAME:
2442       *ws = 0;
2443       obj = SSA_NAME_VAR (*expr_p);
2444       if (!DECL_RTL_SET_P (obj))
2445         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2446       break;
2447
2448     case VAR_DECL:
2449     case PARM_DECL:
2450     case RESULT_DECL:
2451       *ws = 0;
2452       obj = *expr_p;
2453
2454       if (DECL_RTL_SET_P (obj))
2455         break;
2456
2457       if (DECL_MODE (obj) == BLKmode)
2458         x = produce_memory_decl_rtl (obj, regno);
2459       else
2460         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2461
2462       break;
2463
2464     default:
2465       break;
2466     }
2467
2468   if (x)
2469     {
2470       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2471       SET_DECL_RTL (obj, x);
2472     }
2473
2474   return NULL_TREE;
2475 }
2476
2477 /* Determines cost of the computation of EXPR.  */
2478
2479 static unsigned
2480 computation_cost (tree expr)
2481 {
2482   rtx seq, rslt;
2483   tree type = TREE_TYPE (expr);
2484   unsigned cost;
2485   /* Avoid using hard regs in ways which may be unsupported.  */
2486   int regno = LAST_VIRTUAL_REGISTER + 1;
2487
2488   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2489   start_sequence ();
2490   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2491   seq = get_insns ();
2492   end_sequence ();
2493
2494   cost = seq_cost (seq);
2495   if (MEM_P (rslt))
2496     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2497
2498   return cost;
2499 }
2500
2501 /* Returns variable containing the value of candidate CAND at statement AT.  */
2502
2503 static tree
2504 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2505 {
2506   if (stmt_after_increment (loop, cand, stmt))
2507     return cand->var_after;
2508   else
2509     return cand->var_before;
2510 }
2511
2512 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2513    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2514
2515 int
2516 tree_int_cst_sign_bit (tree t)
2517 {
2518   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2519   unsigned HOST_WIDE_INT w;
2520
2521   if (bitno < HOST_BITS_PER_WIDE_INT)
2522     w = TREE_INT_CST_LOW (t);
2523   else
2524     {
2525       w = TREE_INT_CST_HIGH (t);
2526       bitno -= HOST_BITS_PER_WIDE_INT;
2527     }
2528
2529   return (w >> bitno) & 1;
2530 }
2531
2532 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2533    return cst.  Otherwise return NULL_TREE.  */
2534
2535 static tree
2536 constant_multiple_of (tree type, tree top, tree bot)
2537 {
2538   tree res, mby, p0, p1;
2539   enum tree_code code;
2540   bool negate;
2541
2542   STRIP_NOPS (top);
2543   STRIP_NOPS (bot);
2544
2545   if (operand_equal_p (top, bot, 0))
2546     return build_int_cst (type, 1);
2547
2548   code = TREE_CODE (top);
2549   switch (code)
2550     {
2551     case MULT_EXPR:
2552       mby = TREE_OPERAND (top, 1);
2553       if (TREE_CODE (mby) != INTEGER_CST)
2554         return NULL_TREE;
2555
2556       res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2557       if (!res)
2558         return NULL_TREE;
2559
2560       return fold_binary_to_constant (MULT_EXPR, type, res,
2561                                       fold_convert (type, mby));
2562
2563     case PLUS_EXPR:
2564     case MINUS_EXPR:
2565       p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2566       if (!p0)
2567         return NULL_TREE;
2568       p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2569       if (!p1)
2570         return NULL_TREE;
2571
2572       return fold_binary_to_constant (code, type, p0, p1);
2573
2574     case INTEGER_CST:
2575       if (TREE_CODE (bot) != INTEGER_CST)
2576         return NULL_TREE;
2577
2578       bot = fold_convert (type, bot);
2579       top = fold_convert (type, top);
2580
2581       /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2582          the result afterwards.  */
2583       if (tree_int_cst_sign_bit (bot))
2584         {
2585           negate = true;
2586           bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2587         }
2588       else
2589         negate = false;
2590
2591       /* Ditto for TOP.  */
2592       if (tree_int_cst_sign_bit (top))
2593         {
2594           negate = !negate;
2595           top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2596         }
2597
2598       if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2599         return NULL_TREE;
2600
2601       res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2602       if (negate)
2603         res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2604       return res;
2605
2606     default:
2607       return NULL_TREE;
2608     }
2609 }
2610
2611 /* Sets COMB to CST.  */
2612
2613 static void
2614 aff_combination_const (struct affine_tree_combination *comb, tree type,
2615                        unsigned HOST_WIDE_INT cst)
2616 {
2617   unsigned prec = TYPE_PRECISION (type);
2618
2619   comb->type = type;
2620   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2621
2622   comb->n = 0;
2623   comb->rest = NULL_TREE;
2624   comb->offset = cst & comb->mask;
2625 }
2626
2627 /* Sets COMB to single element ELT.  */
2628
2629 static void
2630 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2631 {
2632   unsigned prec = TYPE_PRECISION (type);
2633
2634   comb->type = type;
2635   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2636
2637   comb->n = 1;
2638   comb->elts[0] = elt;
2639   comb->coefs[0] = 1;
2640   comb->rest = NULL_TREE;
2641   comb->offset = 0;
2642 }
2643
2644 /* Scales COMB by SCALE.  */
2645
2646 static void
2647 aff_combination_scale (struct affine_tree_combination *comb,
2648                        unsigned HOST_WIDE_INT scale)
2649 {
2650   unsigned i, j;
2651
2652   if (scale == 1)
2653     return;
2654
2655   if (scale == 0)
2656     {
2657       aff_combination_const (comb, comb->type, 0);
2658       return;
2659     }
2660
2661   comb->offset = (scale * comb->offset) & comb->mask;
2662   for (i = 0, j = 0; i < comb->n; i++)
2663     {
2664       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2665       comb->elts[j] = comb->elts[i];
2666       if (comb->coefs[j] != 0)
2667         j++;
2668     }
2669   comb->n = j;
2670
2671   if (comb->rest)
2672     {
2673       if (comb->n < MAX_AFF_ELTS)
2674         {
2675           comb->coefs[comb->n] = scale;
2676           comb->elts[comb->n] = comb->rest;
2677           comb->rest = NULL_TREE;
2678           comb->n++;
2679         }
2680       else
2681         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2682                                   build_int_cst_type (comb->type, scale));
2683     }
2684 }
2685
2686 /* Adds ELT * SCALE to COMB.  */
2687
2688 static void
2689 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2690                          unsigned HOST_WIDE_INT scale)
2691 {
2692   unsigned i;
2693
2694   if (scale == 0)
2695     return;
2696
2697   for (i = 0; i < comb->n; i++)
2698     if (operand_equal_p (comb->elts[i], elt, 0))
2699       {
2700         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2701         if (comb->coefs[i])
2702           return;
2703
2704         comb->n--;
2705         comb->coefs[i] = comb->coefs[comb->n];
2706         comb->elts[i] = comb->elts[comb->n];
2707
2708         if (comb->rest)
2709           {
2710             gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2711             comb->coefs[comb->n] = 1;
2712             comb->elts[comb->n] = comb->rest;
2713             comb->rest = NULL_TREE;
2714             comb->n++;
2715           }
2716         return;
2717       }
2718   if (comb->n < MAX_AFF_ELTS)
2719     {
2720       comb->coefs[comb->n] = scale;
2721       comb->elts[comb->n] = elt;
2722       comb->n++;
2723       return;
2724     }
2725
2726   if (scale == 1)
2727     elt = fold_convert (comb->type, elt);
2728   else
2729     elt = fold_build2 (MULT_EXPR, comb->type,
2730                        fold_convert (comb->type, elt),
2731                        build_int_cst_type (comb->type, scale));
2732
2733   if (comb->rest)
2734     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2735   else
2736     comb->rest = elt;
2737 }
2738
2739 /* Adds COMB2 to COMB1.  */
2740
2741 static void
2742 aff_combination_add (struct affine_tree_combination *comb1,
2743                      struct affine_tree_combination *comb2)
2744 {
2745   unsigned i;
2746
2747   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2748   for (i = 0; i < comb2->n; i++)
2749     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2750   if (comb2->rest)
2751     aff_combination_add_elt (comb1, comb2->rest, 1);
2752 }
2753
2754 /* Splits EXPR into an affine combination of parts.  */
2755
2756 static void
2757 tree_to_aff_combination (tree expr, tree type,
2758                          struct affine_tree_combination *comb)
2759 {
2760   struct affine_tree_combination tmp;
2761   enum tree_code code;
2762   tree cst, core, toffset;
2763   HOST_WIDE_INT bitpos, bitsize;
2764   enum machine_mode mode;
2765   int unsignedp, volatilep;
2766
2767   STRIP_NOPS (expr);
2768
2769   code = TREE_CODE (expr);
2770   switch (code)
2771     {
2772     case INTEGER_CST:
2773       aff_combination_const (comb, type, int_cst_value (expr));
2774       return;
2775
2776     case PLUS_EXPR:
2777     case MINUS_EXPR:
2778       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2779       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2780       if (code == MINUS_EXPR)
2781         aff_combination_scale (&tmp, -1);
2782       aff_combination_add (comb, &tmp);
2783       return;
2784
2785     case MULT_EXPR:
2786       cst = TREE_OPERAND (expr, 1);
2787       if (TREE_CODE (cst) != INTEGER_CST)
2788         break;
2789       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2790       aff_combination_scale (comb, int_cst_value (cst));
2791       return;
2792
2793     case NEGATE_EXPR:
2794       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2795       aff_combination_scale (comb, -1);
2796       return;
2797
2798     case ADDR_EXPR:
2799       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2800                                   &toffset, &mode, &unsignedp, &volatilep,
2801                                   false);
2802       if (bitpos % BITS_PER_UNIT != 0)
2803         break;
2804       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2805       core = build_fold_addr_expr (core);
2806       if (TREE_CODE (core) == ADDR_EXPR)
2807         aff_combination_add_elt (comb, core, 1);
2808       else
2809         {
2810           tree_to_aff_combination (core, type, &tmp);
2811           aff_combination_add (comb, &tmp);
2812         }
2813       if (toffset)
2814         {
2815           tree_to_aff_combination (toffset, type, &tmp);
2816           aff_combination_add (comb, &tmp);
2817         }
2818       return;
2819
2820     default:
2821       break;
2822     }
2823
2824   aff_combination_elt (comb, type, expr);
2825 }
2826
2827 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2828
2829 static tree
2830 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2831                  unsigned HOST_WIDE_INT mask)
2832 {
2833   enum tree_code code;
2834
2835   scale &= mask;
2836   elt = fold_convert (type, elt);
2837
2838   if (scale == 1)
2839     {
2840       if (!expr)
2841         return elt;
2842
2843       return fold_build2 (PLUS_EXPR, type, expr, elt);
2844     }
2845
2846   if (scale == mask)
2847     {
2848       if (!expr)
2849         return fold_build1 (NEGATE_EXPR, type, elt);
2850
2851       return fold_build2 (MINUS_EXPR, type, expr, elt);
2852     }
2853
2854   if (!expr)
2855     return fold_build2 (MULT_EXPR, type, elt,
2856                         build_int_cst_type (type, scale));
2857
2858   if ((scale | (mask >> 1)) == mask)
2859     {
2860       /* Scale is negative.  */
2861       code = MINUS_EXPR;
2862       scale = (-scale) & mask;
2863     }
2864   else
2865     code = PLUS_EXPR;
2866
2867   elt = fold_build2 (MULT_EXPR, type, elt,
2868                      build_int_cst_type (type, scale));
2869   return fold_build2 (code, type, expr, elt);
2870 }
2871
2872 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2873
2874 static void
2875 unshare_aff_combination (struct affine_tree_combination *comb)
2876 {
2877   unsigned i;
2878
2879   for (i = 0; i < comb->n; i++)
2880     comb->elts[i] = unshare_expr (comb->elts[i]);
2881   if (comb->rest)
2882     comb->rest = unshare_expr (comb->rest);
2883 }
2884
2885 /* Makes tree from the affine combination COMB.  */
2886
2887 static tree
2888 aff_combination_to_tree (struct affine_tree_combination *comb)
2889 {
2890   tree type = comb->type;
2891   tree expr = comb->rest;
2892   unsigned i;
2893   unsigned HOST_WIDE_INT off, sgn;
2894
2895   /* Handle the special case produced by get_computation_aff when
2896      the type does not fit in HOST_WIDE_INT.  */
2897   if (comb->n == 0 && comb->offset == 0)
2898     return fold_convert (type, expr);
2899
2900   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2901
2902   for (i = 0; i < comb->n; i++)
2903     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2904                             comb->mask);
2905
2906   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2907     {
2908       /* Offset is negative.  */
2909       off = (-comb->offset) & comb->mask;
2910       sgn = comb->mask;
2911     }
2912   else
2913     {
2914       off = comb->offset;
2915       sgn = 1;
2916     }
2917   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2918                           comb->mask);
2919 }
2920
2921 /* Determines the expression by that USE is expressed from induction variable
2922    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2923    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2924
2925 static bool
2926 get_computation_aff (struct loop *loop,
2927                      struct iv_use *use, struct iv_cand *cand, tree at,
2928                      struct affine_tree_combination *aff)
2929 {
2930   tree ubase = use->iv->base;
2931   tree ustep = use->iv->step;
2932   tree cbase = cand->iv->base;
2933   tree cstep = cand->iv->step;
2934   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2935   tree uutype;
2936   tree expr, delta;
2937   tree ratio;
2938   unsigned HOST_WIDE_INT ustepi, cstepi;
2939   HOST_WIDE_INT ratioi;
2940   struct affine_tree_combination cbase_aff, expr_aff;
2941   tree cstep_orig = cstep, ustep_orig = ustep;
2942
2943   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2944     {
2945       /* We do not have a precision to express the values of use.  */
2946       return false;
2947     }
2948
2949   expr = var_at_stmt (loop, cand, at);
2950
2951   if (TREE_TYPE (expr) != ctype)
2952     {
2953       /* This may happen with the original ivs.  */
2954       expr = fold_convert (ctype, expr);
2955     }
2956
2957   if (TYPE_UNSIGNED (utype))
2958     uutype = utype;
2959   else
2960     {
2961       uutype = unsigned_type_for (utype);
2962       ubase = fold_convert (uutype, ubase);
2963       ustep = fold_convert (uutype, ustep);
2964     }
2965
2966   if (uutype != ctype)
2967     {
2968       expr = fold_convert (uutype, expr);
2969       cbase = fold_convert (uutype, cbase);
2970       cstep = fold_convert (uutype, cstep);
2971
2972       /* If the conversion is not noop, we must take it into account when
2973          considering the value of the step.  */
2974       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2975         cstep_orig = cstep;
2976     }
2977
2978   if (cst_and_fits_in_hwi (cstep_orig)
2979       && cst_and_fits_in_hwi (ustep_orig))
2980     {
2981       ustepi = int_cst_value (ustep_orig);
2982       cstepi = int_cst_value (cstep_orig);
2983
2984       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2985         {
2986           /* TODO maybe consider case when ustep divides cstep and the ratio is
2987              a power of 2 (so that the division is fast to execute)?  We would
2988              need to be much more careful with overflows etc. then.  */
2989           return false;
2990         }
2991
2992       ratio = build_int_cst_type (uutype, ratioi);
2993     }
2994   else
2995     {
2996       ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
2997       if (!ratio)
2998         return false;
2999
3000       /* Ratioi is only used to detect special cases when the multiplicative
3001          factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3002          we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
3003          to integer_onep/integer_all_onesp, since the former ignores
3004          TREE_OVERFLOW.  */
3005       if (cst_and_fits_in_hwi (ratio))
3006         ratioi = int_cst_value (ratio);
3007       else if (integer_onep (ratio))
3008         ratioi = 1;
3009       else if (integer_all_onesp (ratio))
3010         ratioi = -1;
3011       else
3012         ratioi = 0;
3013     }
3014
3015   /* We may need to shift the value if we are after the increment.  */
3016   if (stmt_after_increment (loop, cand, at))
3017     cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3018
3019   /* use = ubase - ratio * cbase + ratio * var.
3020
3021      In general case ubase + ratio * (var - cbase) could be better (one less
3022      multiplication), but often it is possible to eliminate redundant parts
3023      of computations from (ubase - ratio * cbase) term, and if it does not
3024      happen, fold is able to apply the distributive law to obtain this form
3025      anyway.  */
3026
3027   if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3028     {
3029       /* Let's compute in trees and just return the result in AFF.  This case
3030          should not be very common, and fold itself is not that bad either,
3031          so making the aff. functions more complicated to handle this case
3032          is not that urgent.  */
3033       if (ratioi == 1)
3034         {
3035           delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3036           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3037         }
3038       else if (ratioi == -1)
3039         {
3040           delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3041           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3042         }
3043       else
3044         {
3045           delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3046           delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3047           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3048           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3049         }
3050
3051       aff->type = uutype;
3052       aff->n = 0;
3053       aff->offset = 0;
3054       aff->mask = 0;
3055       aff->rest = expr;
3056       return true;
3057     }
3058
3059   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3060      possible to compute ratioi.  */
3061   gcc_assert (ratioi);
3062
3063   tree_to_aff_combination (ubase, uutype, aff);
3064   tree_to_aff_combination (cbase, uutype, &cbase_aff);
3065   tree_to_aff_combination (expr, uutype, &expr_aff);
3066   aff_combination_scale (&cbase_aff, -ratioi);
3067   aff_combination_scale (&expr_aff, ratioi);
3068   aff_combination_add (aff, &cbase_aff);
3069   aff_combination_add (aff, &expr_aff);
3070
3071   return true;
3072 }
3073
3074 /* Determines the expression by that USE is expressed from induction variable
3075    CAND at statement AT in LOOP.  The computation is unshared.  */
3076
3077 static tree
3078 get_computation_at (struct loop *loop,
3079                     struct iv_use *use, struct iv_cand *cand, tree at)
3080 {
3081   struct affine_tree_combination aff;
3082   tree type = TREE_TYPE (use->iv->base);
3083
3084   if (!get_computation_aff (loop, use, cand, at, &aff))
3085     return NULL_TREE;
3086   unshare_aff_combination (&aff);
3087   return fold_convert (type, aff_combination_to_tree (&aff));
3088 }
3089
3090 /* Determines the expression by that USE is expressed from induction variable
3091    CAND in LOOP.  The computation is unshared.  */
3092
3093 static tree
3094 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3095 {
3096   return get_computation_at (loop, use, cand, use->stmt);
3097 }
3098
3099 /* Returns cost of addition in MODE.  */
3100
3101 static unsigned
3102 add_cost (enum machine_mode mode)
3103 {
3104   static unsigned costs[NUM_MACHINE_MODES];
3105   rtx seq;
3106   unsigned cost;
3107
3108   if (costs[mode])
3109     return costs[mode];
3110
3111   start_sequence ();
3112   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3113                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3114                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3115                  NULL_RTX);
3116   seq = get_insns ();
3117   end_sequence ();
3118
3119   cost = seq_cost (seq);
3120   if (!cost)
3121     cost = 1;
3122
3123   costs[mode] = cost;
3124
3125   if (dump_file && (dump_flags & TDF_DETAILS))
3126     fprintf (dump_file, "Addition in %s costs %d\n",
3127              GET_MODE_NAME (mode), cost);
3128   return cost;
3129 }
3130
3131 /* Entry in a hashtable of already known costs for multiplication.  */
3132 struct mbc_entry
3133 {
3134   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3135   enum machine_mode mode;       /* In mode.  */
3136   unsigned cost;                /* The cost.  */
3137 };
3138
3139 /* Counts hash value for the ENTRY.  */
3140
3141 static hashval_t
3142 mbc_entry_hash (const void *entry)
3143 {
3144   const struct mbc_entry *e = entry;
3145
3146   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3147 }
3148
3149 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3150
3151 static int
3152 mbc_entry_eq (const void *entry1, const void *entry2)
3153 {
3154   const struct mbc_entry *e1 = entry1;
3155   const struct mbc_entry *e2 = entry2;
3156
3157   return (e1->mode == e2->mode
3158           && e1->cst == e2->cst);
3159 }
3160
3161 /* Returns cost of multiplication by constant CST in MODE.  */
3162
3163 unsigned
3164 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3165 {
3166   static htab_t costs;
3167   struct mbc_entry **cached, act;
3168   rtx seq;
3169   unsigned cost;
3170
3171   if (!costs)
3172     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3173
3174   act.mode = mode;
3175   act.cst = cst;
3176   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3177   if (*cached)
3178     return (*cached)->cost;
3179
3180   *cached = XNEW (struct mbc_entry);
3181   (*cached)->mode = mode;
3182   (*cached)->cst = cst;
3183
3184   start_sequence ();
3185   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3186                gen_int_mode (cst, mode), NULL_RTX, 0);
3187   seq = get_insns ();
3188   end_sequence ();
3189
3190   cost = seq_cost (seq);
3191
3192   if (dump_file && (dump_flags & TDF_DETAILS))
3193     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3194              (int) cst, GET_MODE_NAME (mode), cost);
3195
3196   (*cached)->cost = cost;
3197
3198   return cost;
3199 }
3200
3201 /* Returns true if multiplying by RATIO is allowed in address.  */
3202
3203 bool
3204 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3205 {
3206 #define MAX_RATIO 128
3207   static sbitmap valid_mult;
3208
3209   if (!valid_mult)
3210     {
3211       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3212       rtx addr;
3213       HOST_WIDE_INT i;
3214
3215       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3216       sbitmap_zero (valid_mult);
3217       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3218       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3219         {
3220           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3221           if (memory_address_p (Pmode, addr))
3222             SET_BIT (valid_mult, i + MAX_RATIO);
3223         }
3224
3225       if (dump_file && (dump_flags & TDF_DETAILS))
3226         {
3227           fprintf (dump_file, "  allowed multipliers:");
3228           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3229             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3230               fprintf (dump_file, " %d", (int) i);
3231           fprintf (dump_file, "\n");
3232           fprintf (dump_file, "\n");
3233         }
3234     }
3235
3236   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3237     return false;
3238
3239   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3240 }
3241
3242 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3243    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3244    variable is omitted.  The created memory accesses MODE.
3245
3246    TODO -- there must be some better way.  This all is quite crude.  */
3247
3248 static unsigned
3249 get_address_cost (bool symbol_present, bool var_present,
3250                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3251 {
3252   static bool initialized = false;
3253   static HOST_WIDE_INT rat, off;
3254   static HOST_WIDE_INT min_offset, max_offset;
3255   static unsigned costs[2][2][2][2];
3256   unsigned cost, acost;
3257   rtx seq, addr, base;
3258   bool offset_p, ratio_p;
3259   rtx reg1;
3260   HOST_WIDE_INT s_offset;
3261   unsigned HOST_WIDE_INT mask;
3262   unsigned bits;
3263
3264   if (!initialized)
3265     {
3266       HOST_WIDE_INT i;
3267       initialized = true;
3268
3269       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3270
3271       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3272       for (i = 1; i <= 1 << 20; i <<= 1)
3273         {
3274           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3275           if (!memory_address_p (Pmode, addr))
3276             break;
3277         }
3278       max_offset = i >> 1;
3279       off = max_offset;
3280
3281       for (i = 1; i <= 1 << 20; i <<= 1)
3282         {
3283           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3284           if (!memory_address_p (Pmode, addr))
3285             break;
3286         }
3287       min_offset = -(i >> 1);
3288
3289       if (dump_file && (dump_flags & TDF_DETAILS))
3290         {
3291           fprintf (dump_file, "get_address_cost:\n");
3292           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3293           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3294         }
3295
3296       rat = 1;
3297       for (i = 2; i <= MAX_RATIO; i++)
3298         if (multiplier_allowed_in_address_p (i))
3299           {
3300             rat = i;
3301             break;
3302           }
3303     }
3304
3305   bits = GET_MODE_BITSIZE (Pmode);
3306   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3307   offset &= mask;
3308   if ((offset >> (bits - 1) & 1))
3309     offset |= ~mask;
3310   s_offset = offset;
3311
3312   cost = 0;
3313   offset_p = (s_offset != 0
3314               && min_offset <= s_offset && s_offset <= max_offset);
3315   ratio_p = (ratio != 1
3316              && multiplier_allowed_in_address_p (ratio));
3317
3318   if (ratio != 1 && !ratio_p)
3319     cost += multiply_by_cost (ratio, Pmode);
3320
3321   if (s_offset && !offset_p && !symbol_present)
3322     {
3323       cost += add_cost (Pmode);
3324       var_present = true;
3325     }
3326
3327   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3328   if (!acost)
3329     {
3330       int old_cse_not_expected;
3331       acost = 0;
3332
3333       addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3334       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3335       if (ratio_p)
3336         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3337
3338       if (var_present)
3339         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3340
3341       if (symbol_present)
3342         {
3343           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3344           if (offset_p)
3345             base = gen_rtx_fmt_e (CONST, Pmode,
3346                                   gen_rtx_fmt_ee (PLUS, Pmode,
3347                                                   base,
3348                                                   gen_int_mode (off, Pmode)));
3349         }
3350       else if (offset_p)
3351         base = gen_int_mode (off, Pmode);
3352       else
3353         base = NULL_RTX;
3354
3355       if (base)
3356         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3357
3358       start_sequence ();
3359       /* To avoid splitting addressing modes, pretend that no cse will
3360          follow.  */
3361       old_cse_not_expected = cse_not_expected;
3362       cse_not_expected = true;
3363       addr = memory_address (Pmode, addr);
3364       cse_not_expected = old_cse_not_expected;
3365       seq = get_insns ();
3366       end_sequence ();
3367
3368       acost = seq_cost (seq);
3369       acost += address_cost (addr, Pmode);
3370
3371       if (!acost)
3372         acost = 1;
3373       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3374     }
3375
3376   return cost + acost;
3377 }
3378
3379 /* Estimates cost of forcing expression EXPR into a variable.  */
3380
3381 unsigned
3382 force_expr_to_var_cost (tree expr)
3383 {
3384   static bool costs_initialized = false;
3385   static unsigned integer_cost;
3386   static unsigned symbol_cost;
3387   static unsigned address_cost;
3388   tree op0, op1;
3389   unsigned cost0, cost1, cost;
3390   enum machine_mode mode;
3391
3392   if (!costs_initialized)
3393     {
3394       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3395       rtx x = gen_rtx_MEM (DECL_MODE (var),
3396                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3397       tree addr;
3398       tree type = build_pointer_type (integer_type_node);
3399
3400       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3401                                                            2000));
3402
3403       SET_DECL_RTL (var, x);
3404       TREE_STATIC (var) = 1;
3405       addr = build1 (ADDR_EXPR, type, var);
3406       symbol_cost = computation_cost (addr) + 1;
3407
3408       address_cost
3409         = computation_cost (build2 (PLUS_EXPR, type,
3410                                     addr,
3411                                     build_int_cst_type (type, 2000))) + 1;
3412       if (dump_file && (dump_flags & TDF_DETAILS))
3413         {
3414           fprintf (dump_file, "force_expr_to_var_cost:\n");
3415           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3416           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3417           fprintf (dump_file, "  address %d\n", (int) address_cost);
3418           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3419           fprintf (dump_file, "\n");
3420         }
3421
3422       costs_initialized = true;
3423     }
3424
3425   STRIP_NOPS (expr);
3426
3427   if (SSA_VAR_P (expr))
3428     return 0;
3429
3430   if (TREE_INVARIANT (expr))
3431     {
3432       if (TREE_CODE (expr) == INTEGER_CST)
3433         return integer_cost;
3434
3435       if (TREE_CODE (expr) == ADDR_EXPR)
3436         {
3437           tree obj = TREE_OPERAND (expr, 0);
3438
3439           if (TREE_CODE (obj) == VAR_DECL
3440               || TREE_CODE (obj) == PARM_DECL
3441               || TREE_CODE (obj) == RESULT_DECL)
3442             return symbol_cost;
3443         }
3444
3445       return address_cost;
3446     }
3447
3448   switch (TREE_CODE (expr))
3449     {
3450     case PLUS_EXPR:
3451     case MINUS_EXPR:
3452     case MULT_EXPR:
3453       op0 = TREE_OPERAND (expr, 0);
3454       op1 = TREE_OPERAND (expr, 1);
3455       STRIP_NOPS (op0);
3456       STRIP_NOPS (op1);
3457
3458       if (is_gimple_val (op0))
3459         cost0 = 0;
3460       else
3461         cost0 = force_expr_to_var_cost (op0);
3462
3463       if (is_gimple_val (op1))
3464         cost1 = 0;
3465       else
3466         cost1 = force_expr_to_var_cost (op1);
3467
3468       break;
3469
3470     default:
3471       /* Just an arbitrary value, FIXME.  */
3472       return target_spill_cost;
3473     }
3474
3475   mode = TYPE_MODE (TREE_TYPE (expr));
3476   switch (TREE_CODE (expr))
3477     {
3478     case PLUS_EXPR:
3479     case MINUS_EXPR:
3480       cost = add_cost (mode);
3481       break;
3482
3483     case MULT_EXPR:
3484       if (cst_and_fits_in_hwi (op0))
3485         cost = multiply_by_cost (int_cst_value (op0), mode);
3486       else if (cst_and_fits_in_hwi (op1))
3487         cost = multiply_by_cost (int_cst_value (op1), mode);
3488       else
3489         return target_spill_cost;
3490       break;
3491
3492     default:
3493       gcc_unreachable ();
3494     }
3495
3496   cost += cost0;
3497   cost += cost1;
3498
3499   /* Bound the cost by target_spill_cost.  The parts of complicated
3500      computations often are either loop invariant or at least can
3501      be shared between several iv uses, so letting this grow without
3502      limits would not give reasonable results.  */
3503   return cost < target_spill_cost ? cost : target_spill_cost;
3504 }
3505
3506 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3507    invariants the computation depends on.  */
3508
3509 static unsigned
3510 force_var_cost (struct ivopts_data *data,
3511                 tree expr, bitmap *depends_on)
3512 {
3513   if (depends_on)
3514     {
3515       fd_ivopts_data = data;
3516       walk_tree (&expr, find_depends, depends_on, NULL);
3517     }
3518
3519   return force_expr_to_var_cost (expr);
3520 }
3521
3522 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3523    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3524    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3525    invariants the computation depends on.  */
3526
3527 static unsigned
3528 split_address_cost (struct ivopts_data *data,
3529                     tree addr, bool *symbol_present, bool *var_present,
3530                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3531 {
3532   tree core;
3533   HOST_WIDE_INT bitsize;
3534   HOST_WIDE_INT bitpos;
3535   tree toffset;
3536   enum machine_mode mode;
3537   int unsignedp, volatilep;
3538
3539   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3540                               &unsignedp, &volatilep, false);
3541
3542   if (toffset != 0
3543       || bitpos % BITS_PER_UNIT != 0
3544       || TREE_CODE (core) != VAR_DECL)
3545     {
3546       *symbol_present = false;
3547       *var_present = true;
3548       fd_ivopts_data = data;
3549       walk_tree (&addr, find_depends, depends_on, NULL);
3550       return target_spill_cost;
3551     }
3552
3553   *offset += bitpos / BITS_PER_UNIT;
3554   if (TREE_STATIC (core)
3555       || DECL_EXTERNAL (core))
3556     {
3557       *symbol_present = true;
3558       *var_present = false;
3559       return 0;
3560     }
3561
3562   *symbol_present = false;
3563   *var_present = true;
3564   return 0;
3565 }
3566
3567 /* Estimates cost of expressing difference of addresses E1 - E2 as
3568    var + symbol + offset.  The value of offset is added to OFFSET,
3569    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3570    part is missing.  DEPENDS_ON is a set of the invariants the computation
3571    depends on.  */
3572
3573 static unsigned
3574 ptr_difference_cost (struct ivopts_data *data,
3575                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3576                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3577 {
3578   HOST_WIDE_INT diff = 0;
3579   unsigned cost;
3580
3581   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3582
3583   if (ptr_difference_const (e1, e2, &diff))
3584     {
3585       *offset += diff;
3586       *symbol_present = false;
3587       *var_present = false;
3588       return 0;
3589     }
3590
3591   if (e2 == integer_zero_node)
3592     return split_address_cost (data, TREE_OPERAND (e1, 0),
3593                                symbol_present, var_present, offset, depends_on);
3594
3595   *symbol_present = false;
3596   *var_present = true;
3597
3598   cost = force_var_cost (data, e1, depends_on);
3599   cost += force_var_cost (data, e2, depends_on);
3600   cost += add_cost (Pmode);
3601
3602   return cost;
3603 }
3604
3605 /* Estimates cost of expressing difference E1 - E2 as
3606    var + symbol + offset.  The value of offset is added to OFFSET,
3607    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3608    part is missing.  DEPENDS_ON is a set of the invariants the computation
3609    depends on.  */
3610
3611 static unsigned
3612 difference_cost (struct ivopts_data *data,
3613                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3614                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3615 {
3616   unsigned cost;
3617   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3618   unsigned HOST_WIDE_INT off1, off2;
3619
3620   e1 = strip_offset (e1, &off1);
3621   e2 = strip_offset (e2, &off2);
3622   *offset += off1 - off2;
3623
3624   STRIP_NOPS (e1);
3625   STRIP_NOPS (e2);
3626
3627   if (TREE_CODE (e1) == ADDR_EXPR)
3628     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3629                                 depends_on);
3630   *symbol_present = false;
3631
3632   if (operand_equal_p (e1, e2, 0))
3633     {
3634       *var_present = false;
3635       return 0;
3636     }
3637   *var_present = true;
3638   if (zero_p (e2))
3639     return force_var_cost (data, e1, depends_on);
3640
3641   if (zero_p (e1))
3642     {
3643       cost = force_var_cost (data, e2, depends_on);
3644       cost += multiply_by_cost (-1, mode);
3645
3646       return cost;
3647     }
3648
3649   cost = force_var_cost (data, e1, depends_on);
3650   cost += force_var_cost (data, e2, depends_on);
3651   cost += add_cost (mode);
3652
3653   return cost;
3654 }
3655
3656 /* Determines the cost of the computation by that USE is expressed
3657    from induction variable CAND.  If ADDRESS_P is true, we just need
3658    to create an address from it, otherwise we want to get it into
3659    register.  A set of invariants we depend on is stored in
3660    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3661
3662 static unsigned
3663 get_computation_cost_at (struct ivopts_data *data,
3664                          struct iv_use *use, struct iv_cand *cand,
3665                          bool address_p, bitmap *depends_on, tree at)
3666 {
3667   tree ubase = use->iv->base, ustep = use->iv->step;
3668   tree cbase, cstep;
3669   tree utype = TREE_TYPE (ubase), ctype;
3670   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3671   HOST_WIDE_INT ratio, aratio;
3672   bool var_present, symbol_present;
3673   unsigned cost = 0, n_sums;
3674
3675   *depends_on = NULL;
3676
3677   /* Only consider real candidates.  */
3678   if (!cand->iv)
3679     return INFTY;
3680
3681   cbase = cand->iv->base;
3682   cstep = cand->iv->step;
3683   ctype = TREE_TYPE (cbase);
3684
3685   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3686     {
3687       /* We do not have a precision to express the values of use.  */
3688       return INFTY;
3689     }
3690
3691   if (address_p)
3692     {
3693       /* Do not try to express address of an object with computation based
3694          on address of a different object.  This may cause problems in rtl
3695          level alias analysis (that does not expect this to be happening,
3696          as this is illegal in C), and would be unlikely to be useful
3697          anyway.  */
3698       if (use->iv->base_object
3699           && cand->iv->base_object
3700           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3701         return INFTY;
3702     }
3703
3704   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3705     {
3706       /* TODO -- add direct handling of this case.  */
3707       goto fallback;
3708     }
3709
3710   /* CSTEPI is removed from the offset in case statement is after the
3711      increment.  If the step is not constant, we use zero instead.
3712      This is a bit imprecise (there is the extra addition), but
3713      redundancy elimination is likely to transform the code so that
3714      it uses value of the variable before increment anyway,
3715      so it is not that much unrealistic.  */
3716   if (cst_and_fits_in_hwi (cstep))
3717     cstepi = int_cst_value (cstep);
3718   else
3719     cstepi = 0;
3720
3721   if (cst_and_fits_in_hwi (ustep)
3722       && cst_and_fits_in_hwi (cstep))
3723     {
3724       ustepi = int_cst_value (ustep);
3725
3726       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3727         return INFTY;
3728     }
3729   else
3730     {
3731       tree rat;
3732
3733       rat = constant_multiple_of (utype, ustep, cstep);
3734
3735       if (!rat)
3736         return INFTY;
3737
3738       if (cst_and_fits_in_hwi (rat))
3739         ratio = int_cst_value (rat);
3740       else if (integer_onep (rat))
3741         ratio = 1;
3742       else if (integer_all_onesp (rat))
3743         ratio = -1;
3744       else
3745         return INFTY;
3746     }
3747
3748   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3749      or ratio == 1, it is better to handle this like
3750
3751      ubase - ratio * cbase + ratio * var
3752
3753      (also holds in the case ratio == -1, TODO.  */
3754
3755   if (cst_and_fits_in_hwi (cbase))
3756     {
3757       offset = - ratio * int_cst_value (cbase);
3758       cost += difference_cost (data,
3759                                ubase, integer_zero_node,
3760                                &symbol_present, &var_present, &offset,
3761                                depends_on);
3762     }
3763   else if (ratio == 1)
3764     {
3765       cost += difference_cost (data,
3766                                ubase, cbase,
3767                                &symbol_present, &var_present, &offset,
3768                                depends_on);
3769     }
3770   else
3771     {
3772       cost += force_var_cost (data, cbase, depends_on);
3773       cost += add_cost (TYPE_MODE (ctype));
3774       cost += difference_cost (data,
3775                                ubase, integer_zero_node,
3776                                &symbol_present, &var_present, &offset,
3777                                depends_on);
3778     }
3779
3780   /* If we are after the increment, the value of the candidate is higher by
3781      one iteration.  */
3782   if (stmt_after_increment (data->current_loop, cand, at))
3783     offset -= ratio * cstepi;
3784
3785   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3786      (symbol/var/const parts may be omitted).  If we are looking for an address,
3787      find the cost of addressing this.  */
3788   if (address_p)
3789     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3790
3791   /* Otherwise estimate the costs for computing the expression.  */
3792   aratio = ratio > 0 ? ratio : -ratio;
3793   if (!symbol_present && !var_present && !offset)
3794     {
3795       if (ratio != 1)
3796         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3797
3798       return cost;
3799     }
3800
3801   if (aratio != 1)
3802     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3803
3804   n_sums = 1;
3805   if (var_present
3806       /* Symbol + offset should be compile-time computable.  */
3807       && (symbol_present || offset))
3808     n_sums++;
3809
3810   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3811
3812 fallback:
3813   {
3814     /* Just get the expression, expand it and measure the cost.  */
3815     tree comp = get_computation_at (data->current_loop, use, cand, at);
3816
3817     if (!comp)
3818       return INFTY;
3819
3820     if (address_p)
3821       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3822
3823     return computation_cost (comp);
3824   }
3825 }
3826
3827 /* Determines the cost of the computation by that USE is expressed
3828    from induction variable CAND.  If ADDRESS_P is true, we just need
3829    to create an address from it, otherwise we want to get it into
3830    register.  A set of invariants we depend on is stored in
3831    DEPENDS_ON.  */
3832
3833 static unsigned
3834 get_computation_cost (struct ivopts_data *data,
3835                       struct iv_use *use, struct iv_cand *cand,
3836                       bool address_p, bitmap *depends_on)
3837 {
3838   return get_computation_cost_at (data,
3839                                   use, cand, address_p, depends_on, use->stmt);
3840 }
3841
3842 /* Determines cost of basing replacement of USE on CAND in a generic
3843    expression.  */
3844
3845 static bool
3846 determine_use_iv_cost_generic (struct ivopts_data *data,
3847                                struct iv_use *use, struct iv_cand *cand)
3848 {
3849   bitmap depends_on;
3850   unsigned cost;
3851
3852   /* The simple case first -- if we need to express value of the preserved
3853      original biv, the cost is 0.  This also prevents us from counting the
3854      cost of increment twice -- once at this use and once in the cost of
3855      the candidate.  */
3856   if (cand->pos == IP_ORIGINAL
3857       && cand->incremented_at == use->stmt)
3858     {
3859       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3860       return true;
3861     }
3862
3863   cost = get_computation_cost (data, use, cand, false, &depends_on);
3864   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3865
3866   return cost != INFTY;
3867 }
3868
3869 /* Determines cost of basing replacement of USE on CAND in an address.  */
3870
3871 static bool
3872 determine_use_iv_cost_address (struct ivopts_data *data,
3873                                struct iv_use *use, struct iv_cand *cand)
3874 {
3875   bitmap depends_on;
3876   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3877
3878   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3879
3880   return cost != INFTY;
3881 }
3882
3883 /* Computes value of induction variable IV in iteration NITER.  */
3884
3885 static tree
3886 iv_value (struct iv *iv, tree niter)
3887 {
3888   tree val;
3889   tree type = TREE_TYPE (iv->base);
3890
3891   niter = fold_convert (type, niter);
3892   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3893
3894   return fold_build2 (PLUS_EXPR, type, iv->base, val);
3895 }
3896
3897 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3898
3899 static tree
3900 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3901 {
3902   tree val = iv_value (cand->iv, niter);
3903   tree type = TREE_TYPE (cand->iv->base);
3904
3905   if (stmt_after_increment (loop, cand, at))
3906     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3907
3908   return val;
3909 }
3910
3911 /* Returns period of induction variable iv.  */
3912
3913 static tree
3914 iv_period (struct iv *iv)
3915 {
3916   tree step = iv->step, period, type;
3917   tree pow2div;
3918
3919   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3920
3921   /* Period of the iv is gcd (step, type range).  Since type range is power
3922      of two, it suffices to determine the maximum power of two that divides
3923      step.  */
3924   pow2div = num_ending_zeros (step);
3925   type = unsigned_type_for (TREE_TYPE (step));
3926
3927   period = build_low_bits_mask (type,
3928                                 (TYPE_PRECISION (type)
3929                                  - tree_low_cst (pow2div, 1)));
3930
3931   return period;
3932 }
3933
3934 /* Returns the comparison operator used when eliminating the iv USE.  */
3935
3936 static enum tree_code
3937 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3938 {
3939   struct loop *loop = data->current_loop;
3940   basic_block ex_bb;
3941   edge exit;
3942
3943   ex_bb = bb_for_stmt (use->stmt);
3944   exit = EDGE_SUCC (ex_bb, 0);
3945   if (flow_bb_inside_loop_p (loop, exit->dest))
3946     exit = EDGE_SUCC (ex_bb, 1);
3947
3948   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3949 }
3950
3951 /* Check whether it is possible to express the condition in USE by comparison
3952    of candidate CAND.  If so, store the value compared with to BOUND.  */
3953
3954 static bool
3955 may_eliminate_iv (struct ivopts_data *data,
3956                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3957 {
3958   basic_block ex_bb;
3959   edge exit;
3960   struct tree_niter_desc *niter;
3961   tree nit, nit_type;
3962   tree wider_type, period, per_type;
3963   struct loop *loop = data->current_loop;
3964
3965   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3966     return false;
3967
3968   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3969      for other conditions inside loop body.  */
3970   ex_bb = bb_for_stmt (use->stmt);
3971   if (use->stmt != last_stmt (ex_bb)
3972       || TREE_CODE (use->stmt) != COND_EXPR)
3973     return false;
3974   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3975     return false;
3976
3977   exit = EDGE_SUCC (ex_bb, 0);
3978   if (flow_bb_inside_loop_p (loop, exit->dest))
3979     exit = EDGE_SUCC (ex_bb, 1);
3980   if (flow_bb_inside_loop_p (loop, exit->dest))
3981     return false;
3982
3983   niter = niter_for_exit (data, exit);
3984   if (!niter
3985       || !zero_p (niter->may_be_zero))
3986     return false;
3987
3988   nit = niter->niter;
3989   nit_type = TREE_TYPE (nit);
3990
3991   /* Determine whether we may use the variable to test whether niter iterations
3992      elapsed.  This is the case iff the period of the induction variable is
3993      greater than the number of iterations.  */
3994   period = iv_period (cand->iv);
3995   if (!period)
3996     return false;
3997   per_type = TREE_TYPE (period);
3998
3999   wider_type = TREE_TYPE (period);
4000   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4001     wider_type = per_type;
4002   else
4003     wider_type = nit_type;
4004
4005   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4006                                       fold_convert (wider_type, period),
4007                                       fold_convert (wider_type, nit))))
4008     return false;
4009
4010   *bound = cand_value_at (loop, cand, use->stmt, nit);
4011   return true;
4012 }
4013
4014 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4015
4016 static bool
4017 determine_use_iv_cost_condition (struct ivopts_data *data,
4018                                  struct iv_use *use, struct iv_cand *cand)
4019 {
4020   tree bound = NULL_TREE, op, cond;
4021   bitmap depends_on = NULL;
4022   unsigned cost;
4023
4024   /* Only consider real candidates.  */
4025   if (!cand->iv)
4026     {
4027       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4028       return false;
4029     }
4030
4031   if (may_eliminate_iv (data, use, cand, &bound))
4032     {
4033       cost = force_var_cost (data, bound, &depends_on);
4034
4035       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4036       return cost != INFTY;
4037     }
4038
4039   /* The induction variable elimination failed; just express the original
4040      giv.  If it is compared with an invariant, note that we cannot get
4041      rid of it.  */
4042   cost = get_computation_cost (data, use, cand, false, &depends_on);
4043
4044   cond = *use->op_p;
4045   if (TREE_CODE (cond) != SSA_NAME)
4046     {
4047       op = TREE_OPERAND (cond, 0);
4048       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4049         op = TREE_OPERAND (cond, 1);
4050       if (TREE_CODE (op) == SSA_NAME)
4051         {
4052           op = get_iv (data, op)->base;
4053           fd_ivopts_data = data;
4054           walk_tree (&op, find_depends, &depends_on, NULL);
4055         }
4056     }
4057
4058   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4059   return cost != INFTY;
4060 }
4061
4062 /* Determines cost of basing replacement of USE on CAND.  Returns false
4063    if USE cannot be based on CAND.  */
4064
4065 static bool
4066 determine_use_iv_cost (struct ivopts_data *data,
4067                        struct iv_use *use, struct iv_cand *cand)
4068 {
4069   switch (use->type)
4070     {
4071     case USE_NONLINEAR_EXPR:
4072       return determine_use_iv_cost_generic (data, use, cand);
4073
4074     case USE_ADDRESS:
4075       return determine_use_iv_cost_address (data, use, cand);
4076
4077     case USE_COMPARE:
4078       return determine_use_iv_cost_condition (data, use, cand);
4079
4080     default:
4081       gcc_unreachable ();
4082     }
4083 }
4084
4085 /* Determines costs of basing the use of the iv on an iv candidate.  */
4086
4087 static void
4088 determine_use_iv_costs (struct ivopts_data *data)
4089 {
4090   unsigned i, j;
4091   struct iv_use *use;
4092   struct iv_cand *cand;
4093   bitmap to_clear = BITMAP_ALLOC (NULL);
4094
4095   alloc_use_cost_map (data);
4096
4097   for (i = 0; i < n_iv_uses (data); i++)
4098     {
4099       use = iv_use (data, i);
4100
4101       if (data->consider_all_candidates)
4102         {
4103           for (j = 0; j < n_iv_cands (data); j++)
4104             {
4105               cand = iv_cand (data, j);
4106               determine_use_iv_cost (data, use, cand);
4107             }
4108         }
4109       else
4110         {
4111           bitmap_iterator bi;
4112
4113           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4114             {
4115               cand = iv_cand (data, j);
4116               if (!determine_use_iv_cost (data, use, cand))
4117                 bitmap_set_bit (to_clear, j);
4118             }
4119
4120           /* Remove the candidates for that the cost is infinite from
4121              the list of related candidates.  */
4122           bitmap_and_compl_into (use->related_cands, to_clear);
4123           bitmap_clear (to_clear);
4124         }
4125     }
4126
4127   BITMAP_FREE (to_clear);
4128
4129   if (dump_file && (dump_flags & TDF_DETAILS))
4130     {
4131       fprintf (dump_file, "Use-candidate costs:\n");
4132
4133       for (i = 0; i < n_iv_uses (data); i++)
4134         {
4135           use = iv_use (data, i);
4136
4137           fprintf (dump_file, "Use %d:\n", i);
4138           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4139           for (j = 0; j < use->n_map_members; j++)
4140             {
4141               if (!use->cost_map[j].cand
4142                   || use->cost_map[j].cost == INFTY)
4143                 continue;
4144
4145               fprintf (dump_file, "  %d\t%d\t",
4146                        use->cost_map[j].cand->id,
4147                        use->cost_map[j].cost);
4148               if (use->cost_map[j].depends_on)
4149                 bitmap_print (dump_file,
4150                               use->cost_map[j].depends_on, "","");
4151               fprintf (dump_file, "\n");
4152             }
4153
4154           fprintf (dump_file, "\n");
4155         }
4156       fprintf (dump_file, "\n");
4157     }
4158 }
4159
4160 /* Determines cost of the candidate CAND.  */
4161
4162 static void
4163 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4164 {
4165   unsigned cost_base, cost_step;
4166   tree base;
4167
4168   if (!cand->iv)
4169     {
4170       cand->cost = 0;
4171       return;
4172     }
4173
4174   /* There are two costs associated with the candidate -- its increment
4175      and its initialization.  The second is almost negligible for any loop
4176      that rolls enough, so we take it just very little into account.  */
4177
4178   base = cand->iv->base;
4179   cost_base = force_var_cost (data, base, NULL);
4180   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4181
4182   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4183
4184   /* Prefer the original iv unless we may gain something by replacing it;
4185      this is not really relevant for artificial ivs created by other
4186      passes.  */
4187   if (cand->pos == IP_ORIGINAL
4188       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4189     cand->cost--;
4190
4191   /* Prefer not to insert statements into latch unless there are some
4192      already (so that we do not create unnecessary jumps).  */
4193   if (cand->pos == IP_END
4194       && empty_block_p (ip_end_pos (data->current_loop)))
4195     cand->cost++;
4196 }
4197
4198 /* Determines costs of computation of the candidates.  */
4199
4200 static void
4201 determine_iv_costs (struct ivopts_data *data)
4202 {
4203   unsigned i;
4204
4205   if (dump_file && (dump_flags & TDF_DETAILS))
4206     {
4207       fprintf (dump_file, "Candidate costs:\n");
4208       fprintf (dump_file, "  cand\tcost\n");
4209     }
4210
4211   for (i = 0; i < n_iv_cands (data); i++)
4212     {
4213       struct iv_cand *cand = iv_cand (data, i);
4214
4215       determine_iv_cost (data, cand);
4216
4217       if (dump_file && (dump_flags & TDF_DETAILS))
4218         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4219     }
4220
4221 if (dump_file && (dump_flags & TDF_DETAILS))
4222       fprintf (dump_file, "\n");
4223 }
4224
4225 /* Calculates cost for having SIZE induction variables.  */
4226
4227 static unsigned
4228 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4229 {
4230   return global_cost_for_size (size,
4231                                loop_data (data->current_loop)->regs_used,
4232                                n_iv_uses (data));
4233 }
4234
4235 /* For each size of the induction variable set determine the penalty.  */
4236
4237 static void
4238 determine_set_costs (struct ivopts_data *data)
4239 {
4240   unsigned j, n;
4241   tree phi, op;
4242   struct loop *loop = data->current_loop;
4243   bitmap_iterator bi;
4244
4245   /* We use the following model (definitely improvable, especially the
4246      cost function -- TODO):
4247
4248      We estimate the number of registers available (using MD data), name it A.
4249
4250      We estimate the number of registers used by the loop, name it U.  This
4251      number is obtained as the number of loop phi nodes (not counting virtual
4252      registers and bivs) + the number of variables from outside of the loop.
4253
4254      We set a reserve R (free regs that are used for temporary computations,
4255      etc.).  For now the reserve is a constant 3.
4256
4257      Let I be the number of induction variables.
4258
4259      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4260         make a lot of ivs without a reason).
4261      -- if A - R < U + I <= A, the cost is I * PRES_COST
4262      -- if U + I > A, the cost is I * PRES_COST and
4263         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4264
4265   if (dump_file && (dump_flags & TDF_DETAILS))
4266     {
4267       fprintf (dump_file, "Global costs:\n");
4268       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4269       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4270       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4271       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4272     }
4273
4274   n = 0;
4275   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4276     {
4277       op = PHI_RESULT (phi);
4278
4279       if (!is_gimple_reg (op))
4280         continue;
4281
4282       if (get_iv (data, op))
4283         continue;
4284
4285       n++;
4286     }
4287
4288   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4289     {
4290       struct version_info *info = ver_info (data, j);
4291
4292       if (info->inv_id && info->has_nonlin_use)
4293         n++;
4294     }
4295
4296   loop_data (loop)->regs_used = n;
4297   if (dump_file && (dump_flags & TDF_DETAILS))
4298     fprintf (dump_file, "  regs_used %d\n", n);
4299
4300   if (dump_file && (dump_flags & TDF_DETAILS))
4301     {
4302       fprintf (dump_file, "  cost for size:\n");
4303       fprintf (dump_file, "  ivs\tcost\n");
4304       for (j = 0; j <= 2 * target_avail_regs; j++)
4305         fprintf (dump_file, "  %d\t%d\n", j,
4306                  ivopts_global_cost_for_size (data, j));
4307       fprintf (dump_file, "\n");
4308     }
4309 }
4310
4311 /* Returns true if A is a cheaper cost pair than B.  */
4312
4313 static bool
4314 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4315 {
4316   if (!a)
4317     return false;
4318
4319   if (!b)
4320     return true;
4321
4322   if (a->cost < b->cost)
4323     return true;
4324
4325   if (a->cost > b->cost)
4326     return false;
4327
4328   /* In case the costs are the same, prefer the cheaper candidate.  */
4329   if (a->cand->cost < b->cand->cost)
4330     return true;
4331
4332   return false;
4333 }
4334
4335 /* Computes the cost field of IVS structure.  */
4336
4337 static void
4338 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4339 {
4340   unsigned cost = 0;
4341
4342   cost += ivs->cand_use_cost;
4343   cost += ivs->cand_cost;
4344   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4345
4346   ivs->cost = cost;
4347 }
4348
4349 /* Remove invariants in set INVS to set IVS.  */
4350
4351 static void
4352 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4353 {
4354   bitmap_iterator bi;
4355   unsigned iid;
4356
4357   if (!invs)
4358     return;
4359
4360   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4361     {
4362       ivs->n_invariant_uses[iid]--;
4363       if (ivs->n_invariant_uses[iid] == 0)
4364         ivs->n_regs--;
4365     }
4366 }
4367
4368 /* Set USE not to be expressed by any candidate in IVS.  */
4369
4370 static void
4371 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4372                  struct iv_use *use)
4373 {
4374   unsigned uid = use->id, cid;
4375   struct cost_pair *cp;
4376
4377   cp = ivs->cand_for_use[uid];
4378   if (!cp)
4379     return;
4380   cid = cp->cand->id;
4381
4382   ivs->bad_uses++;
4383   ivs->cand_for_use[uid] = NULL;
4384   ivs->n_cand_uses[cid]--;
4385
4386   if (ivs->n_cand_uses[cid] == 0)
4387     {
4388       bitmap_clear_bit (ivs->cands, cid);
4389       /* Do not count the pseudocandidates.  */
4390       if (cp->cand->iv)
4391         ivs->n_regs--;
4392       ivs->n_cands--;
4393       ivs->cand_cost -= cp->cand->cost;
4394
4395       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4396     }
4397
4398   ivs->cand_use_cost -= cp->cost;
4399
4400   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4401   iv_ca_recount_cost (data, ivs);
4402 }
4403
4404 /* Add invariants in set INVS to set IVS.  */
4405
4406 static void
4407 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4408 {
4409   bitmap_iterator bi;
4410   unsigned iid;
4411
4412   if (!invs)
4413     return;
4414
4415   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4416     {
4417       ivs->n_invariant_uses[iid]++;
4418       if (ivs->n_invariant_uses[iid] == 1)
4419         ivs->n_regs++;
4420     }
4421 }
4422
4423 /* Set cost pair for USE in set IVS to CP.  */
4424
4425 static void
4426 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4427               struct iv_use *use, struct cost_pair *cp)
4428 {
4429   unsigned uid = use->id, cid;
4430
4431   if (ivs->cand_for_use[uid] == cp)
4432     return;
4433
4434   if (ivs->cand_for_use[uid])
4435     iv_ca_set_no_cp (data, ivs, use);
4436
4437   if (cp)
4438     {
4439       cid = cp->cand->id;
4440
4441       ivs->bad_uses--;
4442       ivs->cand_for_use[uid] = cp;
4443       ivs->n_cand_uses[cid]++;
4444       if (ivs->n_cand_uses[cid] == 1)
4445         {
4446           bitmap_set_bit (ivs->cands, cid);
4447           /* Do not count the pseudocandidates.  */
4448           if (cp->cand->iv)
4449             ivs->n_regs++;
4450           ivs->n_cands++;
4451           ivs->cand_cost += cp->cand->cost;
4452
4453           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4454         }
4455
4456       ivs->cand_use_cost += cp->cost;
4457       iv_ca_set_add_invariants (ivs, cp->depends_on);
4458       iv_ca_recount_cost (data, ivs);
4459     }
4460 }
4461
4462 /* Extend set IVS by expressing USE by some of the candidates in it
4463    if possible.  */
4464
4465 static void
4466 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4467                struct iv_use *use)
4468 {
4469   struct cost_pair *best_cp = NULL, *cp;
4470   bitmap_iterator bi;
4471   unsigned i;
4472
4473   gcc_assert (ivs->upto >= use->id);
4474
4475   if (ivs->upto == use->id)
4476     {
4477       ivs->upto++;
4478       ivs->bad_uses++;
4479     }
4480
4481   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4482     {
4483       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4484
4485       if (cheaper_cost_pair (cp, best_cp))
4486         best_cp = cp;
4487     }
4488
4489   iv_ca_set_cp (data, ivs, use, best_cp);
4490 }
4491
4492 /* Get cost for assignment IVS.  */
4493
4494 static unsigned
4495 iv_ca_cost (struct iv_ca *ivs)
4496 {
4497   return (ivs->bad_uses ? INFTY : ivs->cost);
4498 }
4499
4500 /* Returns true if all dependences of CP are among invariants in IVS.  */
4501
4502 static bool
4503 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4504 {
4505   unsigned i;
4506   bitmap_iterator bi;
4507
4508   if (!cp->depends_on)
4509     return true;
4510
4511   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4512     {
4513       if (ivs->n_invariant_uses[i] == 0)
4514         return false;
4515     }
4516
4517   return true;
4518 }
4519
4520 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4521    it before NEXT_CHANGE.  */
4522
4523 static struct iv_ca_delta *
4524 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4525                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4526 {
4527   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4528
4529   change->use = use;
4530   change->old_cp = old_cp;
4531   change->new_cp = new_cp;
4532   change->next_change = next_change;
4533
4534   return change;
4535 }
4536
4537 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4538    are rewritten.  */
4539
4540 static struct iv_ca_delta *
4541 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4542 {
4543   struct iv_ca_delta *last;
4544
4545   if (!l2)
4546     return l1;
4547
4548   if (!l1)
4549     return l2;
4550
4551   for (last = l1; last->next_change; last = last->next_change)
4552     continue;
4553   last->next_change = l2;
4554
4555   return l1;
4556 }
4557
4558 /* Returns candidate by that USE is expressed in IVS.  */
4559
4560 static struct cost_pair *
4561 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4562 {
4563   return ivs->cand_for_use[use->id];
4564 }
4565
4566 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4567
4568 static struct iv_ca_delta *
4569 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4570 {
4571   struct iv_ca_delta *act, *next, *prev = NULL;
4572   struct cost_pair *tmp;
4573
4574   for (act = delta; act; act = next)
4575     {
4576       next = act->next_change;
4577       act->next_change = prev;
4578       prev = act;
4579
4580       tmp = act->old_cp;
4581       act->old_cp = act->new_cp;
4582       act->new_cp = tmp;
4583     }
4584
4585   return prev;
4586 }
4587
4588 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4589    reverted instead.  */
4590
4591 static void
4592 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4593                     struct iv_ca_delta *delta, bool forward)
4594 {
4595   struct cost_pair *from, *to;
4596   struct iv_ca_delta *act;
4597
4598   if (!forward)
4599     delta = iv_ca_delta_reverse (delta);
4600
4601   for (act = delta; act; act = act->next_change)
4602     {
4603       from = act->old_cp;
4604       to = act->new_cp;
4605       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4606       iv_ca_set_cp (data, ivs, act->use, to);
4607     }
4608
4609   if (!forward)
4610     iv_ca_delta_reverse (delta);
4611 }
4612
4613 /* Returns true if CAND is used in IVS.  */
4614
4615 static bool
4616 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4617 {
4618   return ivs->n_cand_uses[cand->id] > 0;
4619 }
4620
4621 /* Returns number of induction variable candidates in the set IVS.  */
4622
4623 static unsigned
4624 iv_ca_n_cands (struct iv_ca *ivs)
4625 {
4626   return ivs->n_cands;
4627 }
4628
4629 /* Free the list of changes DELTA.  */
4630
4631 static void
4632 iv_ca_delta_free (struct iv_ca_delta **delta)
4633 {
4634   struct iv_ca_delta *act, *next;
4635
4636   for (act = *delta; act; act = next)
4637     {
4638       next = act->next_change;
4639       free (act);
4640     }
4641
4642   *delta = NULL;
4643 }
4644
4645 /* Allocates new iv candidates assignment.  */
4646
4647 static struct iv_ca *
4648 iv_ca_new (struct ivopts_data *data)
4649 {
4650   struct iv_ca *nw = XNEW (struct iv_ca);
4651
4652   nw->upto = 0;
4653   nw->bad_uses = 0;
4654   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4655   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4656   nw->cands = BITMAP_ALLOC (NULL);
4657   nw->n_cands = 0;
4658   nw->n_regs = 0;
4659   nw->cand_use_cost = 0;
4660   nw->cand_cost = 0;
4661   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4662   nw->cost = 0;
4663
4664   return nw;
4665 }
4666
4667 /* Free memory occupied by the set IVS.  */
4668
4669 static void
4670 iv_ca_free (struct iv_ca **ivs)
4671 {
4672   free ((*ivs)->cand_for_use);
4673   free ((*ivs)->n_cand_uses);
4674   BITMAP_FREE ((*ivs)->cands);
4675   free ((*ivs)->n_invariant_uses);
4676   free (*ivs);
4677   *ivs = NULL;
4678 }
4679
4680 /* Dumps IVS to FILE.  */
4681
4682 static void
4683 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4684 {
4685   const char *pref = "  invariants ";
4686   unsigned i;
4687
4688   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4689   bitmap_print (file, ivs->cands, "  candidates ","\n");
4690
4691   for (i = 1; i <= data->max_inv_id; i++)
4692     if (ivs->n_invariant_uses[i])
4693       {
4694         fprintf (file, "%s%d", pref, i);
4695         pref = ", ";
4696       }
4697   fprintf (file, "\n");
4698 }
4699
4700 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4701    new set, and store differences in DELTA.  Number of induction variables
4702    in the new set is stored to N_IVS.  */
4703
4704 static unsigned
4705 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4706               struct iv_cand *cand, struct iv_ca_delta **delta,
4707               unsigned *n_ivs)
4708 {
4709   unsigned i, cost;
4710   struct iv_use *use;
4711   struct cost_pair *old_cp, *new_cp;
4712
4713   *delta = NULL;
4714   for (i = 0; i < ivs->upto; i++)
4715     {
4716       use = iv_use (data, i);
4717       old_cp = iv_ca_cand_for_use (ivs, use);
4718
4719       if (old_cp
4720           && old_cp->cand == cand)
4721         continue;
4722
4723       new_cp = get_use_iv_cost (data, use, cand);
4724       if (!new_cp)
4725         continue;
4726
4727       if (!iv_ca_has_deps (ivs, new_cp))
4728         continue;
4729
4730       if (!cheaper_cost_pair (new_cp, old_cp))
4731         continue;
4732
4733       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4734     }
4735
4736   iv_ca_delta_commit (data, ivs, *delta, true);
4737   cost = iv_ca_cost (ivs);
4738   if (n_ivs)
4739     *n_ivs = iv_ca_n_cands (ivs);
4740   iv_ca_delta_commit (data, ivs, *delta, false);
4741
4742   return cost;
4743 }
4744
4745 /* Try narrowing set IVS by removing CAND.  Return the cost of
4746    the new set and store the differences in DELTA.  */
4747
4748 static unsigned
4749 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4750               struct iv_cand *cand, struct iv_ca_delta **delta)
4751 {
4752   unsigned i, ci;
4753   struct iv_use *use;
4754   struct cost_pair *old_cp, *new_cp, *cp;
4755   bitmap_iterator bi;
4756   struct iv_cand *cnd;
4757   unsigned cost;
4758
4759   *delta = NULL;
4760   for (i = 0; i < n_iv_uses (data); i++)
4761     {
4762       use = iv_use (data, i);
4763
4764       old_cp = iv_ca_cand_for_use (ivs, use);
4765       if (old_cp->cand != cand)
4766         continue;
4767
4768       new_cp = NULL;
4769
4770       if (data->consider_all_candidates)
4771         {
4772           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4773             {
4774               if (ci == cand->id)
4775                 continue;
4776
4777               cnd = iv_cand (data, ci);
4778
4779               cp = get_use_iv_cost (data, use, cnd);
4780               if (!cp)
4781                 continue;
4782               if (!iv_ca_has_deps (ivs, cp))
4783                 continue;
4784
4785               if (!cheaper_cost_pair (cp, new_cp))
4786                 continue;
4787
4788               new_cp = cp;
4789             }
4790         }
4791       else
4792         {
4793           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4794             {
4795               if (ci == cand->id)
4796                 continue;
4797
4798               cnd = iv_cand (data, ci);
4799
4800               cp = get_use_iv_cost (data, use, cnd);
4801               if (!cp)
4802                 continue;
4803               if (!iv_ca_has_deps (ivs, cp))
4804                 continue;
4805
4806               if (!cheaper_cost_pair (cp, new_cp))
4807                 continue;
4808
4809               new_cp = cp;
4810             }
4811         }
4812
4813       if (!new_cp)
4814         {
4815           iv_ca_delta_free (delta);
4816           return INFTY;
4817         }
4818
4819       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4820     }
4821
4822   iv_ca_delta_commit (data, ivs, *delta, true);
4823   cost = iv_ca_cost (ivs);
4824   iv_ca_delta_commit (data, ivs, *delta, false);
4825
4826   return cost;
4827 }
4828
4829 /* Try optimizing the set of candidates IVS by removing candidates different
4830    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4831    differences in DELTA.  */
4832
4833 static unsigned
4834 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4835              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4836 {
4837   bitmap_iterator bi;
4838   struct iv_ca_delta *act_delta, *best_delta;
4839   unsigned i, best_cost, acost;
4840   struct iv_cand *cand;
4841
4842   best_delta = NULL;
4843   best_cost = iv_ca_cost (ivs);
4844
4845   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4846     {
4847       cand = iv_cand (data, i);
4848
4849       if (cand == except_cand)
4850         continue;
4851
4852       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4853
4854       if (acost < best_cost)
4855         {
4856           best_cost = acost;
4857           iv_ca_delta_free (&best_delta);
4858           best_delta = act_delta;
4859         }
4860       else
4861         iv_ca_delta_free (&act_delta);
4862     }
4863
4864   if (!best_delta)
4865     {
4866       *delta = NULL;
4867       return best_cost;
4868     }
4869
4870   /* Recurse to possibly remove other unnecessary ivs.  */
4871   iv_ca_delta_commit (data, ivs, best_delta, true);
4872   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4873   iv_ca_delta_commit (data, ivs, best_delta, false);
4874   *delta = iv_ca_delta_join (best_delta, *delta);
4875   return best_cost;
4876 }
4877
4878 /* Tries to extend the sets IVS in the best possible way in order
4879    to express the USE.  */
4880
4881 static bool
4882 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4883                   struct iv_use *use)
4884 {
4885   unsigned best_cost, act_cost;
4886   unsigned i;
4887   bitmap_iterator bi;
4888   struct iv_cand *cand;
4889   struct iv_ca_delta *best_delta = NULL, *act_delta;
4890   struct cost_pair *cp;
4891
4892   iv_ca_add_use (data, ivs, use);
4893   best_cost = iv_ca_cost (ivs);
4894
4895   cp = iv_ca_cand_for_use (ivs, use);
4896   if (cp)
4897     {
4898       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4899       iv_ca_set_no_cp (data, ivs, use);
4900     }
4901
4902   /* First try important candidates.  Only if it fails, try the specific ones.
4903      Rationale -- in loops with many variables the best choice often is to use
4904      just one generic biv.  If we added here many ivs specific to the uses,
4905      the optimization algorithm later would be likely to get stuck in a local
4906      minimum, thus causing us to create too many ivs.  The approach from
4907      few ivs to more seems more likely to be successful -- starting from few
4908      ivs, replacing an expensive use by a specific iv should always be a
4909      win.  */
4910   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4911     {
4912       cand = iv_cand (data, i);
4913
4914       if (iv_ca_cand_used_p (ivs, cand))
4915         continue;
4916
4917       cp = get_use_iv_cost (data, use, cand);
4918       if (!cp)
4919         continue;
4920
4921       iv_ca_set_cp (data, ivs, use, cp);
4922       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4923       iv_ca_set_no_cp (data, ivs, use);
4924       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4925
4926       if (act_cost < best_cost)
4927         {
4928           best_cost = act_cost;
4929
4930           iv_ca_delta_free (&best_delta);
4931           best_delta = act_delta;
4932         }
4933       else
4934         iv_ca_delta_free (&act_delta);
4935     }
4936
4937   if (best_cost == INFTY)
4938     {
4939       for (i = 0; i < use->n_map_members; i++)
4940         {
4941           cp = use->cost_map + i;
4942           cand = cp->cand;
4943           if (!cand)
4944             continue;
4945
4946           /* Already tried this.  */
4947           if (cand->important)
4948             continue;
4949
4950           if (iv_ca_cand_used_p (ivs, cand))
4951             continue;
4952
4953           act_delta = NULL;
4954           iv_ca_set_cp (data, ivs, use, cp);
4955           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4956           iv_ca_set_no_cp (data, ivs, use);
4957           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4958                                        cp, act_delta);
4959
4960           if (act_cost < best_cost)
4961             {
4962               best_cost = act_cost;
4963
4964               if (best_delta)
4965                 iv_ca_delta_free (&best_delta);
4966               best_delta = act_delta;
4967             }
4968           else
4969             iv_ca_delta_free (&act_delta);
4970         }
4971     }
4972
4973   iv_ca_delta_commit (data, ivs, best_delta, true);
4974   iv_ca_delta_free (&best_delta);
4975
4976   return (best_cost != INFTY);
4977 }
4978
4979 /* Finds an initial assignment of candidates to uses.  */
4980
4981 static struct iv_ca *
4982 get_initial_solution (struct ivopts_data *data)
4983 {
4984   struct iv_ca *ivs = iv_ca_new (data);
4985   unsigned i;
4986
4987   for (i = 0; i < n_iv_uses (data); i++)
4988     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4989       {
4990         iv_ca_free (&ivs);
4991         return NULL;
4992       }
4993
4994   return ivs;
4995 }
4996
4997 /* Tries to improve set of induction variables IVS.  */
4998
4999 static bool
5000 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5001 {
5002   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5003   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5004   struct iv_cand *cand;
5005
5006   /* Try extending the set of induction variables by one.  */
5007   for (i = 0; i < n_iv_cands (data); i++)
5008     {
5009       cand = iv_cand (data, i);
5010
5011       if (iv_ca_cand_used_p (ivs, cand))
5012         continue;
5013
5014       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5015       if (!act_delta)
5016         continue;
5017
5018       /* If we successfully added the candidate and the set is small enough,
5019          try optimizing it by removing other candidates.  */
5020       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5021         {
5022           iv_ca_delta_commit (data, ivs, act_delta, true);
5023           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5024           iv_ca_delta_commit (data, ivs, act_delta, false);
5025           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5026         }
5027
5028       if (acost < best_cost)
5029         {
5030           best_cost = acost;
5031           iv_ca_delta_free (&best_delta);
5032           best_delta = act_delta;
5033         }
5034       else
5035         iv_ca_delta_free (&act_delta);
5036     }
5037
5038   if (!best_delta)
5039     {
5040       /* Try removing the candidates from the set instead.  */
5041       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5042
5043       /* Nothing more we can do.  */
5044       if (!best_delta)
5045         return false;
5046     }
5047
5048   iv_ca_delta_commit (data, ivs, best_delta, true);
5049   gcc_assert (best_cost == iv_ca_cost (ivs));
5050   iv_ca_delta_free (&best_delta);
5051   return true;
5052 }
5053
5054 /* Attempts to find the optimal set of induction variables.  We do simple
5055    greedy heuristic -- we try to replace at most one candidate in the selected
5056    solution and remove the unused ivs while this improves the cost.  */
5057
5058 static struct iv_ca *
5059 find_optimal_iv_set (struct ivopts_data *data)
5060 {
5061   unsigned i;
5062   struct iv_ca *set;
5063   struct iv_use *use;
5064
5065   /* Get the initial solution.  */
5066   set = get_initial_solution (data);
5067   if (!set)
5068     {
5069       if (dump_file && (dump_flags & TDF_DETAILS))
5070         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5071       return NULL;
5072     }
5073
5074   if (dump_file && (dump_flags & TDF_DETAILS))
5075     {
5076       fprintf (dump_file, "Initial set of candidates:\n");
5077       iv_ca_dump (data, dump_file, set);
5078     }
5079
5080   while (try_improve_iv_set (data, set))
5081     {
5082       if (dump_file && (dump_flags & TDF_DETAILS))
5083         {
5084           fprintf (dump_file, "Improved to:\n");
5085           iv_ca_dump (data, dump_file, set);
5086         }
5087     }
5088
5089   if (dump_file && (dump_flags & TDF_DETAILS))
5090     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5091
5092   for (i = 0; i < n_iv_uses (data); i++)
5093     {
5094       use = iv_use (data, i);
5095       use->selected = iv_ca_cand_for_use (set, use)->cand;
5096     }
5097
5098   return set;
5099 }
5100
5101 /* Creates a new induction variable corresponding to CAND.  */
5102
5103 static void
5104 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5105 {
5106   block_stmt_iterator incr_pos;
5107   tree base;
5108   bool after = false;
5109
5110   if (!cand->iv)
5111     return;
5112
5113   switch (cand->pos)
5114     {
5115     case IP_NORMAL:
5116       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5117       break;
5118
5119     case IP_END:
5120       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5121       after = true;
5122       break;
5123
5124     case IP_ORIGINAL:
5125       /* Mark that the iv is preserved.  */
5126       name_info (data, cand->var_before)->preserve_biv = true;
5127       name_info (data, cand->var_after)->preserve_biv = true;
5128
5129       /* Rewrite the increment so that it uses var_before directly.  */
5130       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5131
5132       return;
5133     }
5134
5135   gimple_add_tmp_var (cand->var_before);
5136   add_referenced_tmp_var (cand->var_before);
5137
5138   base = unshare_expr (cand->iv->base);
5139
5140   create_iv (base, unshare_expr (cand->iv->step),
5141              cand->var_before, data->current_loop,
5142              &incr_pos, after, &cand->var_before, &cand->var_after);
5143 }
5144
5145 /* Creates new induction variables described in SET.  */
5146
5147 static void
5148 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5149 {
5150   unsigned i;
5151   struct iv_cand *cand;
5152   bitmap_iterator bi;
5153
5154   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5155     {
5156       cand = iv_cand (data, i);
5157       create_new_iv (data, cand);
5158     }
5159 }
5160
5161 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5162    is true, remove also the ssa name defined by the statement.  */
5163
5164 static void
5165 remove_statement (tree stmt, bool including_defined_name)
5166 {
5167   if (TREE_CODE (stmt) == PHI_NODE)
5168     {
5169       if (!including_defined_name)
5170         {
5171           /* Prevent the ssa name defined by the statement from being removed.  */
5172           SET_PHI_RESULT (stmt, NULL);
5173         }
5174       remove_phi_node (stmt, NULL_TREE);
5175     }
5176   else
5177     {
5178       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5179
5180       bsi_remove (&bsi, true);
5181     }
5182 }
5183
5184 /* Rewrites USE (definition of iv used in a nonlinear expression)
5185    using candidate CAND.  */
5186
5187 static void
5188 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5189                             struct iv_use *use, struct iv_cand *cand)
5190 {
5191   tree comp;
5192   tree op, stmts, tgt, ass;
5193   block_stmt_iterator bsi, pbsi;
5194
5195   /* An important special case -- if we are asked to express value of
5196      the original iv by itself, just exit; there is no need to
5197      introduce a new computation (that might also need casting the
5198      variable to unsigned and back).  */
5199   if (cand->pos == IP_ORIGINAL
5200       && cand->incremented_at == use->stmt)
5201     {
5202       tree step, ctype, utype;
5203       enum tree_code incr_code = PLUS_EXPR;
5204
5205       gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5206       gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5207
5208       step = cand->iv->step;
5209       ctype = TREE_TYPE (step);
5210       utype = TREE_TYPE (cand->var_after);
5211       if (TREE_CODE (step) == NEGATE_EXPR)
5212         {
5213           incr_code = MINUS_EXPR;
5214           step = TREE_OPERAND (step, 0);
5215         }
5216
5217       /* Check whether we may leave the computation unchanged.
5218          This is the case only if it does not rely on other
5219          computations in the loop -- otherwise, the computation
5220          we rely upon may be removed in remove_unused_ivs,
5221          thus leading to ICE.  */
5222       op = TREE_OPERAND (use->stmt, 1);
5223       if (TREE_CODE (op) == PLUS_EXPR
5224           || TREE_CODE (op) == MINUS_EXPR)
5225         {
5226           if (TREE_OPERAND (op, 0) == cand->var_before)
5227             op = TREE_OPERAND (op, 1);
5228           else if (TREE_CODE (op) == PLUS_EXPR
5229                    && TREE_OPERAND (op, 1) == cand->var_before)
5230             op = TREE_OPERAND (op, 0);
5231           else
5232             op = NULL_TREE;
5233         }
5234       else
5235         op = NULL_TREE;
5236
5237       if (op
5238           && (TREE_CODE (op) == INTEGER_CST
5239               || operand_equal_p (op, step, 0)))
5240         return;
5241
5242       /* Otherwise, add the necessary computations to express
5243          the iv.  */
5244       op = fold_convert (ctype, cand->var_before);
5245       comp = fold_convert (utype,
5246                            build2 (incr_code, ctype, op,
5247                                    unshare_expr (step)));
5248     }
5249   else
5250     comp = get_computation (data->current_loop, use, cand);
5251
5252   switch (TREE_CODE (use->stmt))
5253     {
5254     case PHI_NODE:
5255       tgt = PHI_RESULT (use->stmt);
5256
5257       /* If we should keep the biv, do not replace it.  */
5258       if (name_info (data, tgt)->preserve_biv)
5259         return;
5260
5261       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5262       while (!bsi_end_p (pbsi)
5263              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5264         {
5265           bsi = pbsi;
5266           bsi_next (&pbsi);
5267         }
5268       break;
5269
5270     case MODIFY_EXPR:
5271       tgt = TREE_OPERAND (use->stmt, 0);
5272       bsi = bsi_for_stmt (use->stmt);
5273       break;
5274
5275     default:
5276       gcc_unreachable ();
5277     }
5278
5279   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5280
5281   if (TREE_CODE (use->stmt) == PHI_NODE)
5282     {
5283       if (stmts)
5284         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5285       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5286       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5287       remove_statement (use->stmt, false);
5288       SSA_NAME_DEF_STMT (tgt) = ass;
5289     }
5290   else
5291     {
5292       if (stmts)
5293         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5294       TREE_OPERAND (use->stmt, 1) = op;
5295     }
5296 }
5297
5298 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5299    for_each_index.  */
5300
5301 static bool
5302 idx_remove_ssa_names (tree base, tree *idx,
5303                       void *data ATTRIBUTE_UNUSED)
5304 {
5305   tree *op;
5306
5307   if (TREE_CODE (*idx) == SSA_NAME)
5308     *idx = SSA_NAME_VAR (*idx);
5309
5310   if (TREE_CODE (base) == ARRAY_REF)
5311     {
5312       op = &TREE_OPERAND (base, 2);
5313       if (*op
5314           && TREE_CODE (*op) == SSA_NAME)
5315         *op = SSA_NAME_VAR (*op);
5316       op = &TREE_OPERAND (base, 3);
5317       if (*op
5318           && TREE_CODE (*op) == SSA_NAME)
5319         *op = SSA_NAME_VAR (*op);
5320     }
5321
5322   return true;
5323 }
5324
5325 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5326
5327 static tree
5328 unshare_and_remove_ssa_names (tree ref)
5329 {
5330   ref = unshare_expr (ref);
5331   for_each_index (&ref, idx_remove_ssa_names, NULL);
5332
5333   return ref;
5334 }
5335
5336 /* Extract the alias analysis info for the memory reference REF.  There are
5337    several ways how this information may be stored and what precisely is
5338    its semantics depending on the type of the reference, but there always is
5339    somewhere hidden one _DECL node that is used to determine the set of
5340    virtual operands for the reference.  The code below deciphers this jungle
5341    and extracts this single useful piece of information.  */
5342
5343 static tree
5344 get_ref_tag (tree ref, tree orig)
5345 {
5346   tree var = get_base_address (ref);
5347   tree aref = NULL_TREE, tag, sv;
5348   HOST_WIDE_INT offset, size, maxsize;
5349
5350   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5351     {
5352       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5353       if (ref)
5354         break;
5355     }
5356
5357   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5358     return unshare_expr (sv);
5359
5360   if (!var)
5361     return NULL_TREE;
5362
5363   if (TREE_CODE (var) == INDIRECT_REF)
5364     {
5365       /* In case the base is a dereference of a pointer, first check its name
5366          mem tag, and if it does not have one, use type mem tag.  */
5367       var = TREE_OPERAND (var, 0);
5368       if (TREE_CODE (var) != SSA_NAME)
5369         return NULL_TREE;
5370
5371       if (SSA_NAME_PTR_INFO (var))
5372         {
5373           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5374           if (tag)
5375             return tag;
5376         }
5377
5378       var = SSA_NAME_VAR (var);
5379       tag = var_ann (var)->type_mem_tag;
5380       gcc_assert (tag != NULL_TREE);
5381       return tag;
5382     }
5383   else
5384     {
5385       if (!DECL_P (var))
5386         return NULL_TREE;
5387
5388       tag = var_ann (var)->type_mem_tag;
5389       if (tag)
5390         return tag;
5391
5392       return var;
5393     }
5394 }
5395
5396 /* Copies the reference information from OLD_REF to NEW_REF.  */
5397
5398 static void
5399 copy_ref_info (tree new_ref, tree old_ref)
5400 {
5401   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5402     copy_mem_ref_info (new_ref, old_ref);
5403   else
5404     {
5405       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5406       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5407     }
5408 }
5409
5410 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5411
5412 static void
5413 rewrite_use_address (struct ivopts_data *data,
5414                      struct iv_use *use, struct iv_cand *cand)
5415 {
5416   struct affine_tree_combination aff;
5417   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5418   tree ref;
5419
5420   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5421   unshare_aff_combination (&aff);
5422
5423   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5424   copy_ref_info (ref, *use->op_p);
5425   *use->op_p = ref;
5426 }
5427
5428 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5429    candidate CAND.  */
5430
5431 static void
5432 rewrite_use_compare (struct ivopts_data *data,
5433                      struct iv_use *use, struct iv_cand *cand)
5434 {
5435   tree comp;
5436   tree *op_p, cond, op, stmts, bound;
5437   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5438   enum tree_code compare;
5439   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5440
5441   bound = cp->value;
5442   if (bound)
5443     {
5444       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5445       tree var_type = TREE_TYPE (var);
5446
5447       compare = iv_elimination_compare (data, use);
5448       bound = fold_convert (var_type, bound);
5449       op = force_gimple_operand (unshare_expr (bound), &stmts,
5450                                  true, NULL_TREE);
5451
5452       if (stmts)
5453         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5454
5455       *use->op_p = build2 (compare, boolean_type_node, var, op);
5456       update_stmt (use->stmt);
5457       return;
5458     }
5459
5460   /* The induction variable elimination failed; just express the original
5461      giv.  */
5462   comp = get_computation (data->current_loop, use, cand);
5463
5464   cond = *use->op_p;
5465   op_p = &TREE_OPERAND (cond, 0);
5466   if (TREE_CODE (*op_p) != SSA_NAME
5467       || zero_p (get_iv (data, *op_p)->step))
5468     op_p = &TREE_OPERAND (cond, 1);
5469
5470   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5471   if (stmts)
5472     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5473
5474   *op_p = op;
5475 }
5476
5477 /* Ensure that operand *OP_P may be used at the end of EXIT without
5478    violating loop closed ssa form.  */
5479
5480 static void
5481 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5482 {
5483   basic_block def_bb;
5484   struct loop *def_loop;
5485   tree phi, use;
5486
5487   use = USE_FROM_PTR (op_p);
5488   if (TREE_CODE (use) != SSA_NAME)
5489     return;
5490
5491   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5492   if (!def_bb)
5493     return;
5494
5495   def_loop = def_bb->loop_father;
5496   if (flow_bb_inside_loop_p (def_loop, exit->dest))
5497     return;
5498
5499   /* Try finding a phi node that copies the value out of the loop.  */
5500   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5501     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5502       break;
5503
5504   if (!phi)
5505     {
5506       /* Create such a phi node.  */
5507       tree new_name = duplicate_ssa_name (use, NULL);
5508
5509       phi = create_phi_node (new_name, exit->dest);
5510       SSA_NAME_DEF_STMT (new_name) = phi;
5511       add_phi_arg (phi, use, exit);
5512     }
5513
5514   SET_USE (op_p, PHI_RESULT (phi));
5515 }
5516
5517 /* Ensure that operands of STMT may be used at the end of EXIT without
5518    violating loop closed ssa form.  */
5519
5520 static void
5521 protect_loop_closed_ssa_form (edge exit, tree stmt)
5522 {
5523   ssa_op_iter iter;
5524   use_operand_p use_p;
5525
5526   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5527     protect_loop_closed_ssa_form_use (exit, use_p);
5528 }
5529
5530 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5531    so that they are emitted on the correct place, and so that the loop closed
5532    ssa form is preserved.  */
5533
5534 void
5535 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5536 {
5537   tree_stmt_iterator tsi;
5538   block_stmt_iterator bsi;
5539   tree phi, stmt, def, next;
5540
5541   if (!single_pred_p (exit->dest))
5542     split_loop_exit_edge (exit);
5543
5544   /* Ensure there is label in exit->dest, so that we can
5545      insert after it.  */
5546   tree_block_label (exit->dest);
5547   bsi = bsi_after_labels (exit->dest);
5548
5549   if (TREE_CODE (stmts) == STATEMENT_LIST)
5550     {
5551       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5552         {
5553           tree stmt = tsi_stmt (tsi);
5554           bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5555           protect_loop_closed_ssa_form (exit, stmt);
5556         }
5557     }
5558   else
5559     {
5560       bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5561       protect_loop_closed_ssa_form (exit, stmts);
5562     }
5563
5564   if (!op)
5565     return;
5566
5567   for (phi = phi_nodes (exit->dest); phi; phi = next)
5568     {
5569       next = PHI_CHAIN (phi);
5570
5571       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5572         {
5573           def = PHI_RESULT (phi);
5574           remove_statement (phi, false);
5575           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5576                         def, op);
5577           SSA_NAME_DEF_STMT (def) = stmt;
5578           bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5579         }
5580     }
5581 }
5582
5583 /* Rewrites USE using candidate CAND.  */
5584
5585 static void
5586 rewrite_use (struct ivopts_data *data,
5587              struct iv_use *use, struct iv_cand *cand)
5588 {
5589   switch (use->type)
5590     {
5591       case USE_NONLINEAR_EXPR:
5592         rewrite_use_nonlinear_expr (data, use, cand);
5593         break;
5594
5595       case USE_ADDRESS:
5596         rewrite_use_address (data, use, cand);
5597         break;
5598
5599       case USE_COMPARE:
5600         rewrite_use_compare (data, use, cand);
5601         break;
5602
5603       default:
5604         gcc_unreachable ();
5605     }
5606   mark_new_vars_to_rename (use->stmt);
5607 }
5608
5609 /* Rewrite the uses using the selected induction variables.  */
5610
5611 static void
5612 rewrite_uses (struct ivopts_data *data)
5613 {
5614   unsigned i;
5615   struct iv_cand *cand;
5616   struct iv_use *use;
5617
5618   for (i = 0; i < n_iv_uses (data); i++)
5619     {
5620       use = iv_use (data, i);
5621       cand = use->selected;
5622       gcc_assert (cand);
5623
5624       rewrite_use (data, use, cand);
5625     }
5626 }
5627
5628 /* Removes the ivs that are not used after rewriting.  */
5629
5630 static void
5631 remove_unused_ivs (struct ivopts_data *data)
5632 {
5633   unsigned j;
5634   bitmap_iterator bi;
5635
5636   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5637     {
5638       struct version_info *info;
5639
5640       info = ver_info (data, j);
5641       if (info->iv
5642           && !zero_p (info->iv->step)
5643           && !info->inv_id
5644           && !info->iv->have_use_for
5645           && !info->preserve_biv)
5646         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5647     }
5648 }
5649
5650 /* Frees data allocated by the optimization of a single loop.  */
5651
5652 static void
5653 free_loop_data (struct ivopts_data *data)
5654 {
5655   unsigned i, j;
5656   bitmap_iterator bi;
5657   tree obj;
5658
5659   htab_empty (data->niters);
5660
5661   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5662     {
5663       struct version_info *info;
5664
5665       info = ver_info (data, i);
5666       if (info->iv)
5667         free (info->iv);
5668       info->iv = NULL;
5669       info->has_nonlin_use = false;
5670       info->preserve_biv = false;
5671       info->inv_id = 0;
5672     }
5673   bitmap_clear (data->relevant);
5674   bitmap_clear (data->important_candidates);
5675
5676   for (i = 0; i < n_iv_uses (data); i++)
5677     {
5678       struct iv_use *use = iv_use (data, i);
5679
5680       free (use->iv);
5681       BITMAP_FREE (use->related_cands);
5682       for (j = 0; j < use->n_map_members; j++)
5683         if (use->cost_map[j].depends_on)
5684           BITMAP_FREE (use->cost_map[j].depends_on);
5685       free (use->cost_map);
5686       free (use);
5687     }
5688   VEC_truncate (iv_use_p, data->iv_uses, 0);
5689
5690   for (i = 0; i < n_iv_cands (data); i++)
5691     {
5692       struct iv_cand *cand = iv_cand (data, i);
5693
5694       if (cand->iv)
5695         free (cand->iv);
5696       if (cand->depends_on)
5697         BITMAP_FREE (cand->depends_on);
5698       free (cand);
5699     }
5700   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5701
5702   if (data->version_info_size < num_ssa_names)
5703     {
5704       data->version_info_size = 2 * num_ssa_names;
5705       free (data->version_info);
5706       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5707     }
5708
5709   data->max_inv_id = 0;
5710
5711   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5712     SET_DECL_RTL (obj, NULL_RTX);
5713
5714   VEC_truncate (tree, decl_rtl_to_reset, 0);
5715 }
5716
5717 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5718    loop tree.  */
5719
5720 static void
5721 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5722 {
5723   unsigned i;
5724
5725   for (i = 1; i < loops->num; i++)
5726     if (loops->parray[i])
5727       {
5728         free (loops->parray[i]->aux);
5729         loops->parray[i]->aux = NULL;
5730       }
5731
5732   free_loop_data (data);
5733   free (data->version_info);
5734   BITMAP_FREE (data->relevant);
5735   BITMAP_FREE (data->important_candidates);
5736   htab_delete (data->niters);
5737
5738   VEC_free (tree, heap, decl_rtl_to_reset);
5739   VEC_free (iv_use_p, heap, data->iv_uses);
5740   VEC_free (iv_cand_p, heap, data->iv_candidates);
5741 }
5742
5743 /* Optimizes the LOOP.  Returns true if anything changed.  */
5744
5745 static bool
5746 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5747 {
5748   bool changed = false;
5749   struct iv_ca *iv_ca;
5750   edge exit;
5751
5752   data->current_loop = loop;
5753
5754   if (dump_file && (dump_flags & TDF_DETAILS))
5755     {
5756       fprintf (dump_file, "Processing loop %d\n", loop->num);
5757
5758       exit = single_dom_exit (loop);
5759       if (exit)
5760         {
5761           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5762                    exit->src->index, exit->dest->index);
5763           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5764           fprintf (dump_file, "\n");
5765         }
5766
5767       fprintf (dump_file, "\n");
5768     }
5769
5770   /* For each ssa name determines whether it behaves as an induction variable
5771      in some loop.  */
5772   if (!find_induction_variables (data))
5773     goto finish;
5774
5775   /* Finds interesting uses (item 1).  */
5776   find_interesting_uses (data);
5777   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5778     goto finish;
5779
5780   /* Finds candidates for the induction variables (item 2).  */
5781   find_iv_candidates (data);
5782
5783   /* Calculates the costs (item 3, part 1).  */
5784   determine_use_iv_costs (data);
5785   determine_iv_costs (data);
5786   determine_set_costs (data);
5787
5788   /* Find the optimal set of induction variables (item 3, part 2).  */
5789   iv_ca = find_optimal_iv_set (data);
5790   if (!iv_ca)
5791     goto finish;
5792   changed = true;
5793
5794   /* Create the new induction variables (item 4, part 1).  */
5795   create_new_ivs (data, iv_ca);
5796   iv_ca_free (&iv_ca);
5797
5798   /* Rewrite the uses (item 4, part 2).  */
5799   rewrite_uses (data);
5800
5801   /* Remove the ivs that are unused after rewriting.  */
5802   remove_unused_ivs (data);
5803
5804   /* We have changed the structure of induction variables; it might happen
5805      that definitions in the scev database refer to some of them that were
5806      eliminated.  */
5807   scev_reset ();
5808
5809 finish:
5810   free_loop_data (data);
5811
5812   return changed;
5813 }
5814
5815 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5816
5817 void
5818 tree_ssa_iv_optimize (struct loops *loops)
5819 {
5820   struct loop *loop;
5821   struct ivopts_data data;
5822
5823   tree_ssa_iv_optimize_init (loops, &data);
5824
5825   /* Optimize the loops starting with the innermost ones.  */
5826   loop = loops->tree_root;
5827   while (loop->inner)
5828     loop = loop->inner;
5829
5830   /* Scan the loops, inner ones first.  */
5831   while (loop != loops->tree_root)
5832     {
5833       if (dump_file && (dump_flags & TDF_DETAILS))
5834         flow_loop_dump (loop, dump_file, NULL, 1);
5835
5836       tree_ssa_iv_optimize_loop (&data, loop);
5837
5838       if (loop->next)
5839         {
5840           loop = loop->next;
5841           while (loop->inner)
5842             loop = loop->inner;
5843         }
5844       else
5845         loop = loop->outer;
5846     }
5847
5848   tree_ssa_iv_optimize_finalize (loops, &data);
5849 }