gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "rtl.h"
  70 #include "tm_p.h"
  71 #include "hard-reg-set.h"
  72 #include "basic-block.h"
  73 #include "output.h"
  74 #include "diagnostic.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "varray.h"
  80 #include "expr.h"
  81 #include "tree-pass.h"
  82 #include "ggc.h"
  83 #include "insn-config.h"
  84 #include "recog.h"
  85 #include "pointer-set.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92 #include "tree-affine.h"
  93 #include "target.h"
  94
  95 /* The infinite cost.  */
  96 #define INFTY 10000000
  97
  98 /* The expected number of loop iterations.  TODO -- use profiling instead of
  99    this.  */
 100 #define AVG_LOOP_NITER(LOOP) 5
 101
 102
 103 /* Representation of the induction variable.  */
 104 struct iv
 105 {
 106   tree base;            /* Initial value of the iv.  */
 107   tree base_object;     /* A memory object to that the induction variable points.  */
 108   tree step;            /* Step of the iv (constant only).  */
 109   tree ssa_name;        /* The ssa name with the value.  */
 110   bool biv_p;           /* Is it a biv?  */
 111   bool have_use_for;    /* Do we already have a use for it?  */
 112   unsigned use_id;      /* The identifier in the use if it is the case.  */
 113 };
 114
 115 /* Per-ssa version information (induction variable descriptions, etc.).  */
 116 struct version_info
 117 {
 118   tree name;            /* The ssa name.  */
 119   struct iv *iv;        /* Induction variable description.  */
 120   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 121                            an expression that is not an induction variable.  */
 122   unsigned inv_id;      /* Id of an invariant.  */
 123   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 124 };
 125
 126 /* Types of uses.  */
 127 enum use_type
 128 {
 129   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 130   USE_ADDRESS,          /* Use in an address.  */
 131   USE_COMPARE           /* Use is a compare.  */
 132 };
 133
 134 /* The candidate - cost pair.  */
 135 struct cost_pair
 136 {
 137   struct iv_cand *cand; /* The candidate.  */
 138   unsigned cost;        /* The cost.  */
 139   bitmap depends_on;    /* The list of invariants that have to be
 140                            preserved.  */
 141   tree value;           /* For final value elimination, the expression for
 142                            the final value of the iv.  For iv elimination,
 143                            the new bound to compare with.  */
 144 };
 145
 146 /* Use.  */
 147 struct iv_use
 148 {
 149   unsigned id;          /* The id of the use.  */
 150   enum use_type type;   /* Type of the use.  */
 151   struct iv *iv;        /* The induction variable it is based on.  */
 152   tree stmt;            /* Statement in that it occurs.  */
 153   tree *op_p;           /* The place where it occurs.  */
 154   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 155                            important ones.  */
 156
 157   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 158   struct cost_pair *cost_map;
 159                         /* The costs wrto the iv candidates.  */
 160
 161   struct iv_cand *selected;
 162                         /* The selected candidate.  */
 163 };
 164
 165 /* The position where the iv is computed.  */
 166 enum iv_position
 167 {
 168   IP_NORMAL,            /* At the end, just before the exit condition.  */
 169   IP_END,               /* At the end of the latch block.  */
 170   IP_ORIGINAL           /* The original biv.  */
 171 };
 172
 173 /* The induction variable candidate.  */
 174 struct iv_cand
 175 {
 176   unsigned id;          /* The number of the candidate.  */
 177   bool important;       /* Whether this is an "important" candidate, i.e. such
 178                            that it should be considered by all uses.  */
 179   enum iv_position pos; /* Where it is computed.  */
 180   tree incremented_at;  /* For original biv, the statement where it is
 181                            incremented.  */
 182   tree var_before;      /* The variable used for it before increment.  */
 183   tree var_after;       /* The variable used for it after increment.  */
 184   struct iv *iv;        /* The value of the candidate.  NULL for
 185                            "pseudocandidate" used to indicate the possibility
 186                            to replace the final value of an iv by direct
 187                            computation of the value.  */
 188   unsigned cost;        /* Cost of the candidate.  */
 189   bitmap depends_on;    /* The list of invariants that are used in step of the
 190                            biv.  */
 191 };
 192
 193 /* The data used by the induction variable optimizations.  */
 194
 195 typedef struct iv_use *iv_use_p;
 196 DEF_VEC_P(iv_use_p);
 197 DEF_VEC_ALLOC_P(iv_use_p,heap);
 198
 199 typedef struct iv_cand *iv_cand_p;
 200 DEF_VEC_P(iv_cand_p);
 201 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 202
 203 struct ivopts_data
 204 {
 205   /* The currently optimized loop.  */
 206   struct loop *current_loop;
 207
 208   /* Number of registers used in it.  */
 209   unsigned regs_used;
 210
 211   /* Numbers of iterations for all exits of the current loop.  */
 212   struct pointer_map_t *niters;
 213
 214   /* The size of version_info array allocated.  */
 215   unsigned version_info_size;
 216
 217   /* The array of information for the ssa names.  */
 218   struct version_info *version_info;
 219
 220   /* The bitmap of indices in version_info whose value was changed.  */
 221   bitmap relevant;
 222
 223   /* The maximum invariant id.  */
 224   unsigned max_inv_id;
 225
 226   /* The uses of induction variables.  */
 227   VEC(iv_use_p,heap) *iv_uses;
 228
 229   /* The candidates.  */
 230   VEC(iv_cand_p,heap) *iv_candidates;
 231
 232   /* A bitmap of important candidates.  */
 233   bitmap important_candidates;
 234
 235   /* Whether to consider just related and important candidates when replacing a
 236      use.  */
 237   bool consider_all_candidates;
 238 };
 239
 240 /* An assignment of iv candidates to uses.  */
 241
 242 struct iv_ca
 243 {
 244   /* The number of uses covered by the assignment.  */
 245   unsigned upto;
 246
 247   /* Number of uses that cannot be expressed by the candidates in the set.  */
 248   unsigned bad_uses;
 249
 250   /* Candidate assigned to a use, together with the related costs.  */
 251   struct cost_pair **cand_for_use;
 252
 253   /* Number of times each candidate is used.  */
 254   unsigned *n_cand_uses;
 255
 256   /* The candidates used.  */
 257   bitmap cands;
 258
 259   /* The number of candidates in the set.  */
 260   unsigned n_cands;
 261
 262   /* Total number of registers needed.  */
 263   unsigned n_regs;
 264
 265   /* Total cost of expressing uses.  */
 266   unsigned cand_use_cost;
 267
 268   /* Total cost of candidates.  */
 269   unsigned cand_cost;
 270
 271   /* Number of times each invariant is used.  */
 272   unsigned *n_invariant_uses;
 273
 274   /* Total cost of the assignment.  */
 275   unsigned cost;
 276 };
 277
 278 /* Difference of two iv candidate assignments.  */
 279
 280 struct iv_ca_delta
 281 {
 282   /* Changed use.  */
 283   struct iv_use *use;
 284
 285   /* An old assignment (for rollback purposes).  */
 286   struct cost_pair *old_cp;
 287
 288   /* A new assignment.  */
 289   struct cost_pair *new_cp;
 290
 291   /* Next change in the list.  */
 292   struct iv_ca_delta *next_change;
 293 };
 294
 295 /* Bound on number of candidates below that all candidates are considered.  */
 296
 297 #define CONSIDER_ALL_CANDIDATES_BOUND \
 298   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 299
 300 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 301    optimizing such a loop would help, and it would take ages).  */
 302
 303 #define MAX_CONSIDERED_USES \
 304   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 305
 306 /* If there are at most this number of ivs in the set, try removing unnecessary
 307    ivs from the set always.  */
 308
 309 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 310   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 311
 312 /* The list of trees for that the decl_rtl field must be reset is stored
 313    here.  */
 314
 315 static VEC(tree,heap) *decl_rtl_to_reset;
 316
 317 /* Number of uses recorded in DATA.  */
 318
 319 static inline unsigned
 320 n_iv_uses (struct ivopts_data *data)
 321 {
 322   return VEC_length (iv_use_p, data->iv_uses);
 323 }
 324
 325 /* Ith use recorded in DATA.  */
 326
 327 static inline struct iv_use *
 328 iv_use (struct ivopts_data *data, unsigned i)
 329 {
 330   return VEC_index (iv_use_p, data->iv_uses, i);
 331 }
 332
 333 /* Number of candidates recorded in DATA.  */
 334
 335 static inline unsigned
 336 n_iv_cands (struct ivopts_data *data)
 337 {
 338   return VEC_length (iv_cand_p, data->iv_candidates);
 339 }
 340
 341 /* Ith candidate recorded in DATA.  */
 342
 343 static inline struct iv_cand *
 344 iv_cand (struct ivopts_data *data, unsigned i)
 345 {
 346   return VEC_index (iv_cand_p, data->iv_candidates, i);
 347 }
 348
 349 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 350
 351 edge
 352 single_dom_exit (struct loop *loop)
 353 {
 354   edge exit = single_exit (loop);
 355
 356   if (!exit)
 357     return NULL;
 358
 359   if (!just_once_each_iteration_p (loop, exit->src))
 360     return NULL;
 361
 362   return exit;
 363 }
 364
 365 /* Dumps information about the induction variable IV to FILE.  */
 366
 367 extern void dump_iv (FILE *, struct iv *);
 368 void
 369 dump_iv (FILE *file, struct iv *iv)
 370 {
 371   if (iv->ssa_name)
 372     {
 373       fprintf (file, "ssa name ");
 374       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 375       fprintf (file, "\n");
 376     }
 377
 378   fprintf (file, "  type ");
 379   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 380   fprintf (file, "\n");
 381
 382   if (iv->step)
 383     {
 384       fprintf (file, "  base ");
 385       print_generic_expr (file, iv->base, TDF_SLIM);
 386       fprintf (file, "\n");
 387
 388       fprintf (file, "  step ");
 389       print_generic_expr (file, iv->step, TDF_SLIM);
 390       fprintf (file, "\n");
 391     }
 392   else
 393     {
 394       fprintf (file, "  invariant ");
 395       print_generic_expr (file, iv->base, TDF_SLIM);
 396       fprintf (file, "\n");
 397     }
 398
 399   if (iv->base_object)
 400     {
 401       fprintf (file, "  base object ");
 402       print_generic_expr (file, iv->base_object, TDF_SLIM);
 403       fprintf (file, "\n");
 404     }
 405
 406   if (iv->biv_p)
 407     fprintf (file, "  is a biv\n");
 408 }
 409
 410 /* Dumps information about the USE to FILE.  */
 411
 412 extern void dump_use (FILE *, struct iv_use *);
 413 void
 414 dump_use (FILE *file, struct iv_use *use)
 415 {
 416   fprintf (file, "use %d\n", use->id);
 417
 418   switch (use->type)
 419     {
 420     case USE_NONLINEAR_EXPR:
 421       fprintf (file, "  generic\n");
 422       break;
 423
 424     case USE_ADDRESS:
 425       fprintf (file, "  address\n");
 426       break;
 427
 428     case USE_COMPARE:
 429       fprintf (file, "  compare\n");
 430       break;
 431
 432     default:
 433       gcc_unreachable ();
 434     }
 435
 436   fprintf (file, "  in statement ");
 437   print_generic_expr (file, use->stmt, TDF_SLIM);
 438   fprintf (file, "\n");
 439
 440   fprintf (file, "  at position ");
 441   if (use->op_p)
 442     print_generic_expr (file, *use->op_p, TDF_SLIM);
 443   fprintf (file, "\n");
 444
 445   dump_iv (file, use->iv);
 446
 447   if (use->related_cands)
 448     {
 449       fprintf (file, "  related candidates ");
 450       dump_bitmap (file, use->related_cands);
 451     }
 452 }
 453
 454 /* Dumps information about the uses to FILE.  */
 455
 456 extern void dump_uses (FILE *, struct ivopts_data *);
 457 void
 458 dump_uses (FILE *file, struct ivopts_data *data)
 459 {
 460   unsigned i;
 461   struct iv_use *use;
 462
 463   for (i = 0; i < n_iv_uses (data); i++)
 464     {
 465       use = iv_use (data, i);
 466
 467       dump_use (file, use);
 468       fprintf (file, "\n");
 469     }
 470 }
 471
 472 /* Dumps information about induction variable candidate CAND to FILE.  */
 473
 474 extern void dump_cand (FILE *, struct iv_cand *);
 475 void
 476 dump_cand (FILE *file, struct iv_cand *cand)
 477 {
 478   struct iv *iv = cand->iv;
 479
 480   fprintf (file, "candidate %d%s\n",
 481            cand->id, cand->important ? " (important)" : "");
 482
 483   if (cand->depends_on)
 484     {
 485       fprintf (file, "  depends on ");
 486       dump_bitmap (file, cand->depends_on);
 487     }
 488
 489   if (!iv)
 490     {
 491       fprintf (file, "  final value replacement\n");
 492       return;
 493     }
 494
 495   switch (cand->pos)
 496     {
 497     case IP_NORMAL:
 498       fprintf (file, "  incremented before exit test\n");
 499       break;
 500
 501     case IP_END:
 502       fprintf (file, "  incremented at end\n");
 503       break;
 504
 505     case IP_ORIGINAL:
 506       fprintf (file, "  original biv\n");
 507       break;
 508     }
 509
 510   dump_iv (file, iv);
 511 }
 512
 513 /* Returns the info for ssa version VER.  */
 514
 515 static inline struct version_info *
 516 ver_info (struct ivopts_data *data, unsigned ver)
 517 {
 518   return data->version_info + ver;
 519 }
 520
 521 /* Returns the info for ssa name NAME.  */
 522
 523 static inline struct version_info *
 524 name_info (struct ivopts_data *data, tree name)
 525 {
 526   return ver_info (data, SSA_NAME_VERSION (name));
 527 }
 528
 529 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 530    emitted in LOOP.  */
 531
 532 static bool
 533 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 534 {
 535   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 536
 537   gcc_assert (bb);
 538
 539   if (sbb == loop->latch)
 540     return true;
 541
 542   if (sbb != bb)
 543     return false;
 544
 545   return stmt == last_stmt (bb);
 546 }
 547
 548 /* Returns true if STMT if after the place where the original induction
 549    variable CAND is incremented.  */
 550
 551 static bool
 552 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 553 {
 554   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 555   basic_block stmt_bb = bb_for_stmt (stmt);
 556   block_stmt_iterator bsi;
 557
 558   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 559     return false;
 560
 561   if (stmt_bb != cand_bb)
 562     return true;
 563
 564   /* Scan the block from the end, since the original ivs are usually
 565      incremented at the end of the loop body.  */
 566   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 567     {
 568       if (bsi_stmt (bsi) == cand->incremented_at)
 569         return false;
 570       if (bsi_stmt (bsi) == stmt)
 571         return true;
 572     }
 573 }
 574
 575 /* Returns true if STMT if after the place where the induction variable
 576    CAND is incremented in LOOP.  */
 577
 578 static bool
 579 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 580 {
 581   switch (cand->pos)
 582     {
 583     case IP_END:
 584       return false;
 585
 586     case IP_NORMAL:
 587       return stmt_after_ip_normal_pos (loop, stmt);
 588
 589     case IP_ORIGINAL:
 590       return stmt_after_ip_original_pos (cand, stmt);
 591
 592     default:
 593       gcc_unreachable ();
 594     }
 595 }
 596
 597 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 598
 599 static bool
 600 abnormal_ssa_name_p (tree exp)
 601 {
 602   if (!exp)
 603     return false;
 604
 605   if (TREE_CODE (exp) != SSA_NAME)
 606     return false;
 607
 608   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 609 }
 610
 611 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 612    abnormal phi node.  Callback for for_each_index.  */
 613
 614 static bool
 615 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 616                                   void *data ATTRIBUTE_UNUSED)
 617 {
 618   if (TREE_CODE (base) == ARRAY_REF)
 619     {
 620       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 621         return false;
 622       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 623         return false;
 624     }
 625
 626   return !abnormal_ssa_name_p (*index);
 627 }
 628
 629 /* Returns true if EXPR contains a ssa name that occurs in an
 630    abnormal phi node.  */
 631
 632 bool
 633 contains_abnormal_ssa_name_p (tree expr)
 634 {
 635   enum tree_code code;
 636   enum tree_code_class codeclass;
 637
 638   if (!expr)
 639     return false;
 640
 641   code = TREE_CODE (expr);
 642   codeclass = TREE_CODE_CLASS (code);
 643
 644   if (code == SSA_NAME)
 645     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 646
 647   if (code == INTEGER_CST
 648       || is_gimple_min_invariant (expr))
 649     return false;
 650
 651   if (code == ADDR_EXPR)
 652     return !for_each_index (&TREE_OPERAND (expr, 0),
 653                             idx_contains_abnormal_ssa_name_p,
 654                             NULL);
 655
 656   switch (codeclass)
 657     {
 658     case tcc_binary:
 659     case tcc_comparison:
 660       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 661         return true;
 662
 663       /* Fallthru.  */
 664     case tcc_unary:
 665       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 666         return true;
 667
 668       break;
 669
 670     default:
 671       gcc_unreachable ();
 672     }
 673
 674   return false;
 675 }
 676
 677 /*  Returns tree describing number of iterations determined from
 678     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 679
 680 static tree
 681 niter_for_exit (struct ivopts_data *data, edge exit)
 682 {
 683   struct tree_niter_desc desc;
 684   tree niter;
 685   void **slot;
 686
 687   if (!data->niters)
 688     {
 689       data->niters = pointer_map_create ();
 690       slot = NULL;
 691     }
 692   else
 693     slot = pointer_map_contains (data->niters, exit);
 694
 695   if (!slot)
 696     {
 697       /* Try to determine number of iterations.  We must know it
 698          unconditionally (i.e., without possibility of # of iterations
 699          being zero).  Also, we cannot safely work with ssa names that
 700          appear in phi nodes on abnormal edges, so that we do not create
 701          overlapping life ranges for them (PR 27283).  */
 702       if (number_of_iterations_exit (data->current_loop,
 703                                      exit, &desc, true)
 704           && integer_zerop (desc.may_be_zero)
 705           && !contains_abnormal_ssa_name_p (desc.niter))
 706         niter = desc.niter;
 707       else
 708         niter = NULL_TREE;
 709
 710       *pointer_map_insert (data->niters, exit) = niter;
 711     }
 712   else
 713     niter = (tree) *slot;
 714
 715   return niter;
 716 }
 717
 718 /* Returns tree describing number of iterations determined from
 719    single dominating exit of DATA->current_loop, or NULL if something
 720    goes wrong.  */
 721
 722 static tree
 723 niter_for_single_dom_exit (struct ivopts_data *data)
 724 {
 725   edge exit = single_dom_exit (data->current_loop);
 726
 727   if (!exit)
 728     return NULL;
 729
 730   return niter_for_exit (data, exit);
 731 }
 732
 733 /* Initializes data structures used by the iv optimization pass, stored
 734    in DATA.  */
 735
 736 static void
 737 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 738 {
 739   data->version_info_size = 2 * num_ssa_names;
 740   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 741   data->relevant = BITMAP_ALLOC (NULL);
 742   data->important_candidates = BITMAP_ALLOC (NULL);
 743   data->max_inv_id = 0;
 744   data->niters = NULL;
 745   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 746   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 747   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 748 }
 749
 750 /* Returns a memory object to that EXPR points.  In case we are able to
 751    determine that it does not point to any such object, NULL is returned.  */
 752
 753 static tree
 754 determine_base_object (tree expr)
 755 {
 756   enum tree_code code = TREE_CODE (expr);
 757   tree base, obj;
 758
 759   /* If this is a pointer casted to any type, we need to determine
 760      the base object for the pointer; so handle conversions before
 761      throwing away non-pointer expressions.  */
 762   if (TREE_CODE (expr) == NOP_EXPR
 763       || TREE_CODE (expr) == CONVERT_EXPR)
 764     return determine_base_object (TREE_OPERAND (expr, 0));
 765
 766   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 767     return NULL_TREE;
 768
 769   switch (code)
 770     {
 771     case INTEGER_CST:
 772       return NULL_TREE;
 773
 774     case ADDR_EXPR:
 775       obj = TREE_OPERAND (expr, 0);
 776       base = get_base_address (obj);
 777
 778       if (!base)
 779         return expr;
 780
 781       if (TREE_CODE (base) == INDIRECT_REF)
 782         return determine_base_object (TREE_OPERAND (base, 0));
 783
 784       return fold_convert (ptr_type_node,
 785                            build_fold_addr_expr (base));
 786
 787     case POINTER_PLUS_EXPR:
 788       return determine_base_object (TREE_OPERAND (expr, 0));
 789
 790     case PLUS_EXPR:
 791     case MINUS_EXPR:
 792       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 793       gcc_unreachable ();
 794
 795     default:
 796       return fold_convert (ptr_type_node, expr);
 797     }
 798 }
 799
 800 /* Allocates an induction variable with given initial value BASE and step STEP
 801    for loop LOOP.  */
 802
 803 static struct iv *
 804 alloc_iv (tree base, tree step)
 805 {
 806   struct iv *iv = XCNEW (struct iv);
 807   gcc_assert (step != NULL_TREE);
 808
 809   iv->base = base;
 810   iv->base_object = determine_base_object (base);
 811   iv->step = step;
 812   iv->biv_p = false;
 813   iv->have_use_for = false;
 814   iv->use_id = 0;
 815   iv->ssa_name = NULL_TREE;
 816
 817   return iv;
 818 }
 819
 820 /* Sets STEP and BASE for induction variable IV.  */
 821
 822 static void
 823 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 824 {
 825   struct version_info *info = name_info (data, iv);
 826
 827   gcc_assert (!info->iv);
 828
 829   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 830   info->iv = alloc_iv (base, step);
 831   info->iv->ssa_name = iv;
 832 }
 833
 834 /* Finds induction variable declaration for VAR.  */
 835
 836 static struct iv *
 837 get_iv (struct ivopts_data *data, tree var)
 838 {
 839   basic_block bb;
 840   tree type = TREE_TYPE (var);
 841
 842   if (!POINTER_TYPE_P (type)
 843       && !INTEGRAL_TYPE_P (type))
 844     return NULL;
 845
 846   if (!name_info (data, var)->iv)
 847     {
 848       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 849
 850       if (!bb
 851           || !flow_bb_inside_loop_p (data->current_loop, bb))
 852         set_iv (data, var, var, build_int_cst (type, 0));
 853     }
 854
 855   return name_info (data, var)->iv;
 856 }
 857
 858 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 859    not define a simple affine biv with nonzero step.  */
 860
 861 static tree
 862 determine_biv_step (tree phi)
 863 {
 864   struct loop *loop = bb_for_stmt (phi)->loop_father;
 865   tree name = PHI_RESULT (phi);
 866   affine_iv iv;
 867
 868   if (!is_gimple_reg (name))
 869     return NULL_TREE;
 870
 871   if (!simple_iv (loop, phi, name, &iv, true))
 872     return NULL_TREE;
 873
 874   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 875 }
 876
 877 /* Finds basic ivs.  */
 878
 879 static bool
 880 find_bivs (struct ivopts_data *data)
 881 {
 882   tree phi, step, type, base;
 883   bool found = false;
 884   struct loop *loop = data->current_loop;
 885
 886   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 887     {
 888       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 889         continue;
 890
 891       step = determine_biv_step (phi);
 892       if (!step)
 893         continue;
 894
 895       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 896       base = expand_simple_operations (base);
 897       if (contains_abnormal_ssa_name_p (base)
 898           || contains_abnormal_ssa_name_p (step))
 899         continue;
 900
 901       type = TREE_TYPE (PHI_RESULT (phi));
 902       base = fold_convert (type, base);
 903       if (step)
 904         step = fold_convert (type, step);
 905
 906       set_iv (data, PHI_RESULT (phi), base, step);
 907       found = true;
 908     }
 909
 910   return found;
 911 }
 912
 913 /* Marks basic ivs.  */
 914
 915 static void
 916 mark_bivs (struct ivopts_data *data)
 917 {
 918   tree phi, var;
 919   struct iv *iv, *incr_iv;
 920   struct loop *loop = data->current_loop;
 921   basic_block incr_bb;
 922
 923   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 924     {
 925       iv = get_iv (data, PHI_RESULT (phi));
 926       if (!iv)
 927         continue;
 928
 929       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 930       incr_iv = get_iv (data, var);
 931       if (!incr_iv)
 932         continue;
 933
 934       /* If the increment is in the subloop, ignore it.  */
 935       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 936       if (incr_bb->loop_father != data->current_loop
 937           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 938         continue;
 939
 940       iv->biv_p = true;
 941       incr_iv->biv_p = true;
 942     }
 943 }
 944
 945 /* Checks whether STMT defines a linear induction variable and stores its
 946    parameters to IV.  */
 947
 948 static bool
 949 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 950 {
 951   tree lhs;
 952   struct loop *loop = data->current_loop;
 953
 954   iv->base = NULL_TREE;
 955   iv->step = NULL_TREE;
 956
 957   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 958     return false;
 959
 960   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 961   if (TREE_CODE (lhs) != SSA_NAME)
 962     return false;
 963
 964   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 965     return false;
 966   iv->base = expand_simple_operations (iv->base);
 967
 968   if (contains_abnormal_ssa_name_p (iv->base)
 969       || contains_abnormal_ssa_name_p (iv->step))
 970     return false;
 971
 972   return true;
 973 }
 974
 975 /* Finds general ivs in statement STMT.  */
 976
 977 static void
 978 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
 979 {
 980   affine_iv iv;
 981
 982   if (!find_givs_in_stmt_scev (data, stmt, &iv))
 983     return;
 984
 985   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
 986 }
 987
 988 /* Finds general ivs in basic block BB.  */
 989
 990 static void
 991 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
 992 {
 993   block_stmt_iterator bsi;
 994
 995   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 996     find_givs_in_stmt (data, bsi_stmt (bsi));
 997 }
 998
 999 /* Finds general ivs.  */
1000
1001 static void
1002 find_givs (struct ivopts_data *data)
1003 {
1004   struct loop *loop = data->current_loop;
1005   basic_block *body = get_loop_body_in_dom_order (loop);
1006   unsigned i;
1007
1008   for (i = 0; i < loop->num_nodes; i++)
1009     find_givs_in_bb (data, body[i]);
1010   free (body);
1011 }
1012
1013 /* For each ssa name defined in LOOP determines whether it is an induction
1014    variable and if so, its initial value and step.  */
1015
1016 static bool
1017 find_induction_variables (struct ivopts_data *data)
1018 {
1019   unsigned i;
1020   bitmap_iterator bi;
1021
1022   if (!find_bivs (data))
1023     return false;
1024
1025   find_givs (data);
1026   mark_bivs (data);
1027
1028   if (dump_file && (dump_flags & TDF_DETAILS))
1029     {
1030       tree niter = niter_for_single_dom_exit (data);
1031
1032       if (niter)
1033         {
1034           fprintf (dump_file, "  number of iterations ");
1035           print_generic_expr (dump_file, niter, TDF_SLIM);
1036           fprintf (dump_file, "\n\n");
1037         };
1038
1039       fprintf (dump_file, "Induction variables:\n\n");
1040
1041       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1042         {
1043           if (ver_info (data, i)->iv)
1044             dump_iv (dump_file, ver_info (data, i)->iv);
1045         }
1046     }
1047
1048   return true;
1049 }
1050
1051 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1052
1053 static struct iv_use *
1054 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1055             tree stmt, enum use_type use_type)
1056 {
1057   struct iv_use *use = XCNEW (struct iv_use);
1058
1059   use->id = n_iv_uses (data);
1060   use->type = use_type;
1061   use->iv = iv;
1062   use->stmt = stmt;
1063   use->op_p = use_p;
1064   use->related_cands = BITMAP_ALLOC (NULL);
1065
1066   /* To avoid showing ssa name in the dumps, if it was not reset by the
1067      caller.  */
1068   iv->ssa_name = NULL_TREE;
1069
1070   if (dump_file && (dump_flags & TDF_DETAILS))
1071     dump_use (dump_file, use);
1072
1073   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1074
1075   return use;
1076 }
1077
1078 /* Checks whether OP is a loop-level invariant and if so, records it.
1079    NONLINEAR_USE is true if the invariant is used in a way we do not
1080    handle specially.  */
1081
1082 static void
1083 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1084 {
1085   basic_block bb;
1086   struct version_info *info;
1087
1088   if (TREE_CODE (op) != SSA_NAME
1089       || !is_gimple_reg (op))
1090     return;
1091
1092   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1093   if (bb
1094       && flow_bb_inside_loop_p (data->current_loop, bb))
1095     return;
1096
1097   info = name_info (data, op);
1098   info->name = op;
1099   info->has_nonlin_use |= nonlinear_use;
1100   if (!info->inv_id)
1101     info->inv_id = ++data->max_inv_id;
1102   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1103 }
1104
1105 /* Checks whether the use OP is interesting and if so, records it.  */
1106
1107 static struct iv_use *
1108 find_interesting_uses_op (struct ivopts_data *data, tree op)
1109 {
1110   struct iv *iv;
1111   struct iv *civ;
1112   tree stmt;
1113   struct iv_use *use;
1114
1115   if (TREE_CODE (op) != SSA_NAME)
1116     return NULL;
1117
1118   iv = get_iv (data, op);
1119   if (!iv)
1120     return NULL;
1121
1122   if (iv->have_use_for)
1123     {
1124       use = iv_use (data, iv->use_id);
1125
1126       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1127       return use;
1128     }
1129
1130   if (integer_zerop (iv->step))
1131     {
1132       record_invariant (data, op, true);
1133       return NULL;
1134     }
1135   iv->have_use_for = true;
1136
1137   civ = XNEW (struct iv);
1138   *civ = *iv;
1139
1140   stmt = SSA_NAME_DEF_STMT (op);
1141   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1142               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1143
1144   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1145   iv->use_id = use->id;
1146
1147   return use;
1148 }
1149
1150 /* Given a condition *COND_P, checks whether it is a compare of an induction
1151    variable and an invariant.  If this is the case, CONTROL_VAR is set
1152    to location of the iv, BOUND to the location of the invariant,
1153    IV_VAR and IV_BOUND are set to the corresponding induction variable
1154    descriptions, and true is returned.  If this is not the case,
1155    CONTROL_VAR and BOUND are set to the arguments of the condition and
1156    false is returned.  */
1157
1158 static bool
1159 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1160                        tree **control_var, tree **bound,
1161                        struct iv **iv_var, struct iv **iv_bound)
1162 {
1163   /* The nodes returned when COND has just one operand.  Note that you should
1164      not modify anything in BOUND or IV_BOUND because of this.  */
1165   static struct iv const_iv;
1166   static tree zero;
1167   tree cond = *cond_p;
1168   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1169   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1170   bool ret = false;
1171
1172   zero = integer_zero_node;
1173   const_iv.step = integer_zero_node;
1174
1175   if (TREE_CODE (cond) == SSA_NAME)
1176     {
1177       op0 = cond_p;
1178       iv0 = get_iv (data, cond);
1179       ret = (iv0 && !integer_zerop (iv0->step));
1180       goto end;
1181     }
1182
1183   if (!COMPARISON_CLASS_P (cond))
1184     {
1185       op0 = cond_p;
1186       goto end;
1187     }
1188
1189   op0 = &TREE_OPERAND (cond, 0);
1190   op1 = &TREE_OPERAND (cond, 1);
1191   if (TREE_CODE (*op0) == SSA_NAME)
1192     iv0 = get_iv (data, *op0);
1193   if (TREE_CODE (*op1) == SSA_NAME)
1194     iv1 = get_iv (data, *op1);
1195
1196   /* Exactly one of the compared values must be an iv, and the other one must
1197      be an invariant.  */
1198   if (!iv0 || !iv1)
1199     goto end;
1200
1201   if (integer_zerop (iv0->step))
1202     {
1203       /* Control variable may be on the other side.  */
1204       tmp_op = op0; op0 = op1; op1 = tmp_op;
1205       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1206     }
1207   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1208
1209 end:
1210   if (control_var)
1211     *control_var = op0;;
1212   if (iv_var)
1213     *iv_var = iv0;;
1214   if (bound)
1215     *bound = op1;
1216   if (iv_bound)
1217     *iv_bound = iv1;
1218
1219   return ret;
1220 }
1221
1222 /* Checks whether the condition *COND_P in STMT is interesting
1223    and if so, records it.  */
1224
1225 static void
1226 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1227 {
1228   tree *var_p, *bound_p;
1229   struct iv *var_iv, *civ;
1230
1231   if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1232     {
1233       find_interesting_uses_op (data, *var_p);
1234       find_interesting_uses_op (data, *bound_p);
1235       return;
1236     }
1237
1238   civ = XNEW (struct iv);
1239   *civ = *var_iv;
1240   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1241 }
1242
1243 /* Returns true if expression EXPR is obviously invariant in LOOP,
1244    i.e. if all its operands are defined outside of the LOOP.  */
1245
1246 bool
1247 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1248 {
1249   basic_block def_bb;
1250   unsigned i, len;
1251
1252   if (is_gimple_min_invariant (expr))
1253     return true;
1254
1255   if (TREE_CODE (expr) == SSA_NAME)
1256     {
1257       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1258       if (def_bb
1259           && flow_bb_inside_loop_p (loop, def_bb))
1260         return false;
1261
1262       return true;
1263     }
1264
1265   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1266     return false;
1267
1268   len = TREE_OPERAND_LENGTH (expr);
1269   for (i = 0; i < len; i++)
1270     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1271       return false;
1272
1273   return true;
1274 }
1275
1276 /* Cumulates the steps of indices into DATA and replaces their values with the
1277    initial ones.  Returns false when the value of the index cannot be determined.
1278    Callback for for_each_index.  */
1279
1280 struct ifs_ivopts_data
1281 {
1282   struct ivopts_data *ivopts_data;
1283   tree stmt;
1284   tree step;
1285 };
1286
1287 static bool
1288 idx_find_step (tree base, tree *idx, void *data)
1289 {
1290   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1291   struct iv *iv;
1292   tree step, iv_base, iv_step, lbound, off;
1293   struct loop *loop = dta->ivopts_data->current_loop;
1294
1295   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1296       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1297     return false;
1298
1299   /* If base is a component ref, require that the offset of the reference
1300      be invariant.  */
1301   if (TREE_CODE (base) == COMPONENT_REF)
1302     {
1303       off = component_ref_field_offset (base);
1304       return expr_invariant_in_loop_p (loop, off);
1305     }
1306
1307   /* If base is array, first check whether we will be able to move the
1308      reference out of the loop (in order to take its address in strength
1309      reduction).  In order for this to work we need both lower bound
1310      and step to be loop invariants.  */
1311   if (TREE_CODE (base) == ARRAY_REF)
1312     {
1313       step = array_ref_element_size (base);
1314       lbound = array_ref_low_bound (base);
1315
1316       if (!expr_invariant_in_loop_p (loop, step)
1317           || !expr_invariant_in_loop_p (loop, lbound))
1318         return false;
1319     }
1320
1321   if (TREE_CODE (*idx) != SSA_NAME)
1322     return true;
1323
1324   iv = get_iv (dta->ivopts_data, *idx);
1325   if (!iv)
1326     return false;
1327
1328   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1329           *&x[0], which is not folded and does not trigger the
1330           ARRAY_REF path below.  */
1331   *idx = iv->base;
1332
1333   if (integer_zerop (iv->step))
1334     return true;
1335
1336   if (TREE_CODE (base) == ARRAY_REF)
1337     {
1338       step = array_ref_element_size (base);
1339
1340       /* We only handle addresses whose step is an integer constant.  */
1341       if (TREE_CODE (step) != INTEGER_CST)
1342         return false;
1343     }
1344   else
1345     /* The step for pointer arithmetics already is 1 byte.  */
1346     step = build_int_cst (sizetype, 1);
1347
1348   iv_base = iv->base;
1349   iv_step = iv->step;
1350   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1351                             sizetype, &iv_base, &iv_step, dta->stmt,
1352                             false))
1353     {
1354       /* The index might wrap.  */
1355       return false;
1356     }
1357
1358   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1359   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1360
1361   return true;
1362 }
1363
1364 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1365    object is passed to it in DATA.  */
1366
1367 static bool
1368 idx_record_use (tree base, tree *idx,
1369                 void *vdata)
1370 {
1371   struct ivopts_data *data = (struct ivopts_data *) vdata;
1372   find_interesting_uses_op (data, *idx);
1373   if (TREE_CODE (base) == ARRAY_REF)
1374     {
1375       find_interesting_uses_op (data, array_ref_element_size (base));
1376       find_interesting_uses_op (data, array_ref_low_bound (base));
1377     }
1378   return true;
1379 }
1380
1381 /* Returns true if memory reference REF may be unaligned.  */
1382
1383 static bool
1384 may_be_unaligned_p (tree ref)
1385 {
1386   tree base;
1387   tree base_type;
1388   HOST_WIDE_INT bitsize;
1389   HOST_WIDE_INT bitpos;
1390   tree toffset;
1391   enum machine_mode mode;
1392   int unsignedp, volatilep;
1393   unsigned base_align;
1394
1395   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1396      thus they are not misaligned.  */
1397   if (TREE_CODE (ref) == TARGET_MEM_REF)
1398     return false;
1399
1400   /* The test below is basically copy of what expr.c:normal_inner_ref
1401      does to check whether the object must be loaded by parts when
1402      STRICT_ALIGNMENT is true.  */
1403   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1404                               &unsignedp, &volatilep, true);
1405   base_type = TREE_TYPE (base);
1406   base_align = TYPE_ALIGN (base_type);
1407
1408   if (mode != BLKmode
1409       && (base_align < GET_MODE_ALIGNMENT (mode)
1410           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1411           || bitpos % BITS_PER_UNIT != 0))
1412     return true;
1413
1414   return false;
1415 }
1416
1417 /* Return true if EXPR may be non-addressable.   */
1418
1419 static bool
1420 may_be_nonaddressable_p (tree expr)
1421 {
1422   switch (TREE_CODE (expr))
1423     {
1424     case COMPONENT_REF:
1425       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1426              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1427
1428     case ARRAY_REF:
1429     case ARRAY_RANGE_REF:
1430       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1431
1432     case VIEW_CONVERT_EXPR:
1433       /* This kind of view-conversions may wrap non-addressable objects
1434          and make them look addressable.  After some processing the
1435          non-addressability may be uncovered again, causing ADDR_EXPRs
1436          of inappropriate objects to be built.  */
1437       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1438              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1439
1440     default:
1441       break;
1442     }
1443
1444   return false;
1445 }
1446
1447 /* Finds addresses in *OP_P inside STMT.  */
1448
1449 static void
1450 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1451 {
1452   tree base = *op_p, step = build_int_cst (sizetype, 0);
1453   struct iv *civ;
1454   struct ifs_ivopts_data ifs_ivopts_data;
1455
1456   /* Do not play with volatile memory references.  A bit too conservative,
1457      perhaps, but safe.  */
1458   if (stmt_ann (stmt)->has_volatile_ops)
1459     goto fail;
1460
1461   /* Ignore bitfields for now.  Not really something terribly complicated
1462      to handle.  TODO.  */
1463   if (TREE_CODE (base) == BIT_FIELD_REF)
1464     goto fail;
1465
1466   if (may_be_nonaddressable_p (base))
1467     goto fail;
1468
1469   if (STRICT_ALIGNMENT
1470       && may_be_unaligned_p (base))
1471     goto fail;
1472
1473   base = unshare_expr (base);
1474
1475   if (TREE_CODE (base) == TARGET_MEM_REF)
1476     {
1477       tree type = build_pointer_type (TREE_TYPE (base));
1478       tree astep;
1479
1480       if (TMR_BASE (base)
1481           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1482         {
1483           civ = get_iv (data, TMR_BASE (base));
1484           if (!civ)
1485             goto fail;
1486
1487           TMR_BASE (base) = civ->base;
1488           step = civ->step;
1489         }
1490       if (TMR_INDEX (base)
1491           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1492         {
1493           civ = get_iv (data, TMR_INDEX (base));
1494           if (!civ)
1495             goto fail;
1496
1497           TMR_INDEX (base) = civ->base;
1498           astep = civ->step;
1499
1500           if (astep)
1501             {
1502               if (TMR_STEP (base))
1503                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1504
1505               step = fold_build2 (PLUS_EXPR, type, step, astep);
1506             }
1507         }
1508
1509       if (integer_zerop (step))
1510         goto fail;
1511       base = tree_mem_ref_addr (type, base);
1512     }
1513   else
1514     {
1515       ifs_ivopts_data.ivopts_data = data;
1516       ifs_ivopts_data.stmt = stmt;
1517       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1518       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1519           || integer_zerop (ifs_ivopts_data.step))
1520         goto fail;
1521       step = ifs_ivopts_data.step;
1522
1523       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1524       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1525
1526       base = build_fold_addr_expr (base);
1527
1528       /* Substituting bases of IVs into the base expression might
1529          have caused folding opportunities.  */
1530       if (TREE_CODE (base) == ADDR_EXPR)
1531         {
1532           tree *ref = &TREE_OPERAND (base, 0);
1533           while (handled_component_p (*ref))
1534             ref = &TREE_OPERAND (*ref, 0);
1535           if (TREE_CODE (*ref) == INDIRECT_REF)
1536             *ref = fold_indirect_ref (*ref);
1537         }
1538     }
1539
1540   civ = alloc_iv (base, step);
1541   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1542   return;
1543
1544 fail:
1545   for_each_index (op_p, idx_record_use, data);
1546 }
1547
1548 /* Finds and records invariants used in STMT.  */
1549
1550 static void
1551 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1552 {
1553   ssa_op_iter iter;
1554   use_operand_p use_p;
1555   tree op;
1556
1557   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1558     {
1559       op = USE_FROM_PTR (use_p);
1560       record_invariant (data, op, false);
1561     }
1562 }
1563
1564 /* Finds interesting uses of induction variables in the statement STMT.  */
1565
1566 static void
1567 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1568 {
1569   struct iv *iv;
1570   tree op, lhs, rhs;
1571   ssa_op_iter iter;
1572   use_operand_p use_p;
1573
1574   find_invariants_stmt (data, stmt);
1575
1576   if (TREE_CODE (stmt) == COND_EXPR)
1577     {
1578       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1579       return;
1580     }
1581
1582   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1583     {
1584       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1585       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1586
1587       if (TREE_CODE (lhs) == SSA_NAME)
1588         {
1589           /* If the statement defines an induction variable, the uses are not
1590              interesting by themselves.  */
1591
1592           iv = get_iv (data, lhs);
1593
1594           if (iv && !integer_zerop (iv->step))
1595             return;
1596         }
1597
1598       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1599         {
1600         case tcc_comparison:
1601           find_interesting_uses_cond (data, stmt,
1602                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1603           return;
1604
1605         case tcc_reference:
1606           find_interesting_uses_address (data, stmt,
1607                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1608           if (REFERENCE_CLASS_P (lhs))
1609             find_interesting_uses_address (data, stmt,
1610                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1611           return;
1612
1613         default: ;
1614         }
1615
1616       if (REFERENCE_CLASS_P (lhs)
1617           && is_gimple_val (rhs))
1618         {
1619           find_interesting_uses_address (data, stmt,
1620                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1621           find_interesting_uses_op (data, rhs);
1622           return;
1623         }
1624
1625       /* TODO -- we should also handle address uses of type
1626
1627          memory = call (whatever);
1628
1629          and
1630
1631          call (memory).  */
1632     }
1633
1634   if (TREE_CODE (stmt) == PHI_NODE
1635       && bb_for_stmt (stmt) == data->current_loop->header)
1636     {
1637       lhs = PHI_RESULT (stmt);
1638       iv = get_iv (data, lhs);
1639
1640       if (iv && !integer_zerop (iv->step))
1641         return;
1642     }
1643
1644   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1645     {
1646       op = USE_FROM_PTR (use_p);
1647
1648       if (TREE_CODE (op) != SSA_NAME)
1649         continue;
1650
1651       iv = get_iv (data, op);
1652       if (!iv)
1653         continue;
1654
1655       find_interesting_uses_op (data, op);
1656     }
1657 }
1658
1659 /* Finds interesting uses of induction variables outside of loops
1660    on loop exit edge EXIT.  */
1661
1662 static void
1663 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1664 {
1665   tree phi, def;
1666
1667   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1668     {
1669       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1670       if (is_gimple_reg (def))
1671         find_interesting_uses_op (data, def);
1672     }
1673 }
1674
1675 /* Finds uses of the induction variables that are interesting.  */
1676
1677 static void
1678 find_interesting_uses (struct ivopts_data *data)
1679 {
1680   basic_block bb;
1681   block_stmt_iterator bsi;
1682   tree phi;
1683   basic_block *body = get_loop_body (data->current_loop);
1684   unsigned i;
1685   struct version_info *info;
1686   edge e;
1687
1688   if (dump_file && (dump_flags & TDF_DETAILS))
1689     fprintf (dump_file, "Uses:\n\n");
1690
1691   for (i = 0; i < data->current_loop->num_nodes; i++)
1692     {
1693       edge_iterator ei;
1694       bb = body[i];
1695
1696       FOR_EACH_EDGE (e, ei, bb->succs)
1697         if (e->dest != EXIT_BLOCK_PTR
1698             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1699           find_interesting_uses_outside (data, e);
1700
1701       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1702         find_interesting_uses_stmt (data, phi);
1703       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1704         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1705     }
1706
1707   if (dump_file && (dump_flags & TDF_DETAILS))
1708     {
1709       bitmap_iterator bi;
1710
1711       fprintf (dump_file, "\n");
1712
1713       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1714         {
1715           info = ver_info (data, i);
1716           if (info->inv_id)
1717             {
1718               fprintf (dump_file, "  ");
1719               print_generic_expr (dump_file, info->name, TDF_SLIM);
1720               fprintf (dump_file, " is invariant (%d)%s\n",
1721                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1722             }
1723         }
1724
1725       fprintf (dump_file, "\n");
1726     }
1727
1728   free (body);
1729 }
1730
1731 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1732    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1733    we are at the top-level of the processed address.  */
1734
1735 static tree
1736 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1737                 unsigned HOST_WIDE_INT *offset)
1738 {
1739   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1740   enum tree_code code;
1741   tree type, orig_type = TREE_TYPE (expr);
1742   unsigned HOST_WIDE_INT off0, off1, st;
1743   tree orig_expr = expr;
1744
1745   STRIP_NOPS (expr);
1746
1747   type = TREE_TYPE (expr);
1748   code = TREE_CODE (expr);
1749   *offset = 0;
1750
1751   switch (code)
1752     {
1753     case INTEGER_CST:
1754       if (!cst_and_fits_in_hwi (expr)
1755           || integer_zerop (expr))
1756         return orig_expr;
1757
1758       *offset = int_cst_value (expr);
1759       return build_int_cst (orig_type, 0);
1760
1761     case POINTER_PLUS_EXPR:
1762     case PLUS_EXPR:
1763     case MINUS_EXPR:
1764       op0 = TREE_OPERAND (expr, 0);
1765       op1 = TREE_OPERAND (expr, 1);
1766
1767       op0 = strip_offset_1 (op0, false, false, &off0);
1768       op1 = strip_offset_1 (op1, false, false, &off1);
1769
1770       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1771       if (op0 == TREE_OPERAND (expr, 0)
1772           && op1 == TREE_OPERAND (expr, 1))
1773         return orig_expr;
1774
1775       if (integer_zerop (op1))
1776         expr = op0;
1777       else if (integer_zerop (op0))
1778         {
1779           if (code == MINUS_EXPR)
1780             expr = fold_build1 (NEGATE_EXPR, type, op1);
1781           else
1782             expr = op1;
1783         }
1784       else
1785         expr = fold_build2 (code, type, op0, op1);
1786
1787       return fold_convert (orig_type, expr);
1788
1789     case ARRAY_REF:
1790       if (!inside_addr)
1791         return orig_expr;
1792
1793       step = array_ref_element_size (expr);
1794       if (!cst_and_fits_in_hwi (step))
1795         break;
1796
1797       st = int_cst_value (step);
1798       op1 = TREE_OPERAND (expr, 1);
1799       op1 = strip_offset_1 (op1, false, false, &off1);
1800       *offset = off1 * st;
1801
1802       if (top_compref
1803           && integer_zerop (op1))
1804         {
1805           /* Strip the component reference completely.  */
1806           op0 = TREE_OPERAND (expr, 0);
1807           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1808           *offset += off0;
1809           return op0;
1810         }
1811       break;
1812
1813     case COMPONENT_REF:
1814       if (!inside_addr)
1815         return orig_expr;
1816
1817       tmp = component_ref_field_offset (expr);
1818       if (top_compref
1819           && cst_and_fits_in_hwi (tmp))
1820         {
1821           /* Strip the component reference completely.  */
1822           op0 = TREE_OPERAND (expr, 0);
1823           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1824           *offset = off0 + int_cst_value (tmp);
1825           return op0;
1826         }
1827       break;
1828
1829     case ADDR_EXPR:
1830       op0 = TREE_OPERAND (expr, 0);
1831       op0 = strip_offset_1 (op0, true, true, &off0);
1832       *offset += off0;
1833
1834       if (op0 == TREE_OPERAND (expr, 0))
1835         return orig_expr;
1836
1837       expr = build_fold_addr_expr (op0);
1838       return fold_convert (orig_type, expr);
1839
1840     case INDIRECT_REF:
1841       inside_addr = false;
1842       break;
1843
1844     default:
1845       return orig_expr;
1846     }
1847
1848   /* Default handling of expressions for that we want to recurse into
1849      the first operand.  */
1850   op0 = TREE_OPERAND (expr, 0);
1851   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1852   *offset += off0;
1853
1854   if (op0 == TREE_OPERAND (expr, 0)
1855       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1856     return orig_expr;
1857
1858   expr = copy_node (expr);
1859   TREE_OPERAND (expr, 0) = op0;
1860   if (op1)
1861     TREE_OPERAND (expr, 1) = op1;
1862
1863   /* Inside address, we might strip the top level component references,
1864      thus changing type of the expression.  Handling of ADDR_EXPR
1865      will fix that.  */
1866   expr = fold_convert (orig_type, expr);
1867
1868   return expr;
1869 }
1870
1871 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1872
1873 static tree
1874 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1875 {
1876   return strip_offset_1 (expr, false, false, offset);
1877 }
1878
1879 /* Returns variant of TYPE that can be used as base for different uses.
1880    We return unsigned type with the same precision, which avoids problems
1881    with overflows.  */
1882
1883 static tree
1884 generic_type_for (tree type)
1885 {
1886   if (POINTER_TYPE_P (type))
1887     return unsigned_type_for (type);
1888
1889   if (TYPE_UNSIGNED (type))
1890     return type;
1891
1892   return unsigned_type_for (type);
1893 }
1894
1895 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1896    the bitmap to that we should store it.  */
1897
1898 static struct ivopts_data *fd_ivopts_data;
1899 static tree
1900 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1901 {
1902   bitmap *depends_on = (bitmap *) data;
1903   struct version_info *info;
1904
1905   if (TREE_CODE (*expr_p) != SSA_NAME)
1906     return NULL_TREE;
1907   info = name_info (fd_ivopts_data, *expr_p);
1908
1909   if (!info->inv_id || info->has_nonlin_use)
1910     return NULL_TREE;
1911
1912   if (!*depends_on)
1913     *depends_on = BITMAP_ALLOC (NULL);
1914   bitmap_set_bit (*depends_on, info->inv_id);
1915
1916   return NULL_TREE;
1917 }
1918
1919 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1920    position to POS.  If USE is not NULL, the candidate is set as related to
1921    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1922    replacement of the final value of the iv by a direct computation.  */
1923
1924 static struct iv_cand *
1925 add_candidate_1 (struct ivopts_data *data,
1926                  tree base, tree step, bool important, enum iv_position pos,
1927                  struct iv_use *use, tree incremented_at)
1928 {
1929   unsigned i;
1930   struct iv_cand *cand = NULL;
1931   tree type, orig_type;
1932
1933   if (base)
1934     {
1935       orig_type = TREE_TYPE (base);
1936       type = generic_type_for (orig_type);
1937       if (type != orig_type)
1938         {
1939           base = fold_convert (type, base);
1940           step = fold_convert (type, step);
1941         }
1942     }
1943
1944   for (i = 0; i < n_iv_cands (data); i++)
1945     {
1946       cand = iv_cand (data, i);
1947
1948       if (cand->pos != pos)
1949         continue;
1950
1951       if (cand->incremented_at != incremented_at)
1952         continue;
1953
1954       if (!cand->iv)
1955         {
1956           if (!base && !step)
1957             break;
1958
1959           continue;
1960         }
1961
1962       if (!base && !step)
1963         continue;
1964
1965       if (operand_equal_p (base, cand->iv->base, 0)
1966           && operand_equal_p (step, cand->iv->step, 0))
1967         break;
1968     }
1969
1970   if (i == n_iv_cands (data))
1971     {
1972       cand = XCNEW (struct iv_cand);
1973       cand->id = i;
1974
1975       if (!base && !step)
1976         cand->iv = NULL;
1977       else
1978         cand->iv = alloc_iv (base, step);
1979
1980       cand->pos = pos;
1981       if (pos != IP_ORIGINAL && cand->iv)
1982         {
1983           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1984           cand->var_after = cand->var_before;
1985         }
1986       cand->important = important;
1987       cand->incremented_at = incremented_at;
1988       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1989
1990       if (step
1991           && TREE_CODE (step) != INTEGER_CST)
1992         {
1993           fd_ivopts_data = data;
1994           walk_tree (&step, find_depends, &cand->depends_on, NULL);
1995         }
1996
1997       if (dump_file && (dump_flags & TDF_DETAILS))
1998         dump_cand (dump_file, cand);
1999     }
2000
2001   if (important && !cand->important)
2002     {
2003       cand->important = true;
2004       if (dump_file && (dump_flags & TDF_DETAILS))
2005         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2006     }
2007
2008   if (use)
2009     {
2010       bitmap_set_bit (use->related_cands, i);
2011       if (dump_file && (dump_flags & TDF_DETAILS))
2012         fprintf (dump_file, "Candidate %d is related to use %d\n",
2013                  cand->id, use->id);
2014     }
2015
2016   return cand;
2017 }
2018
2019 /* Returns true if incrementing the induction variable at the end of the LOOP
2020    is allowed.
2021
2022    The purpose is to avoid splitting latch edge with a biv increment, thus
2023    creating a jump, possibly confusing other optimization passes and leaving
2024    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2025    is not available (so we do not have a better alternative), or if the latch
2026    edge is already nonempty.  */
2027
2028 static bool
2029 allow_ip_end_pos_p (struct loop *loop)
2030 {
2031   if (!ip_normal_pos (loop))
2032     return true;
2033
2034   if (!empty_block_p (ip_end_pos (loop)))
2035     return true;
2036
2037   return false;
2038 }
2039
2040 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2041    position to POS.  If USE is not NULL, the candidate is set as related to
2042    it.  The candidate computation is scheduled on all available positions.  */
2043
2044 static void
2045 add_candidate (struct ivopts_data *data,
2046                tree base, tree step, bool important, struct iv_use *use)
2047 {
2048   if (ip_normal_pos (data->current_loop))
2049     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2050   if (ip_end_pos (data->current_loop)
2051       && allow_ip_end_pos_p (data->current_loop))
2052     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2053 }
2054
2055 /* Add a standard "0 + 1 * iteration" iv candidate for a
2056    type with SIZE bits.  */
2057
2058 static void
2059 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2060                                      unsigned int size)
2061 {
2062   tree type = lang_hooks.types.type_for_size (size, true);
2063   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2064                  true, NULL);
2065 }
2066
2067 /* Adds standard iv candidates.  */
2068
2069 static void
2070 add_standard_iv_candidates (struct ivopts_data *data)
2071 {
2072   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2073
2074   /* The same for a double-integer type if it is still fast enough.  */
2075   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2076     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2077 }
2078
2079
2080 /* Adds candidates bases on the old induction variable IV.  */
2081
2082 static void
2083 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2084 {
2085   tree phi, def;
2086   struct iv_cand *cand;
2087
2088   add_candidate (data, iv->base, iv->step, true, NULL);
2089
2090   /* The same, but with initial value zero.  */
2091   add_candidate (data,
2092                  build_int_cst (TREE_TYPE (iv->base), 0),
2093                  iv->step, true, NULL);
2094
2095   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2096   if (TREE_CODE (phi) == PHI_NODE)
2097     {
2098       /* Additionally record the possibility of leaving the original iv
2099          untouched.  */
2100       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2101       cand = add_candidate_1 (data,
2102                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2103                               SSA_NAME_DEF_STMT (def));
2104       cand->var_before = iv->ssa_name;
2105       cand->var_after = def;
2106     }
2107 }
2108
2109 /* Adds candidates based on the old induction variables.  */
2110
2111 static void
2112 add_old_ivs_candidates (struct ivopts_data *data)
2113 {
2114   unsigned i;
2115   struct iv *iv;
2116   bitmap_iterator bi;
2117
2118   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2119     {
2120       iv = ver_info (data, i)->iv;
2121       if (iv && iv->biv_p && !integer_zerop (iv->step))
2122         add_old_iv_candidates (data, iv);
2123     }
2124 }
2125
2126 /* Adds candidates based on the value of the induction variable IV and USE.  */
2127
2128 static void
2129 add_iv_value_candidates (struct ivopts_data *data,
2130                          struct iv *iv, struct iv_use *use)
2131 {
2132   unsigned HOST_WIDE_INT offset;
2133   tree base;
2134
2135   add_candidate (data, iv->base, iv->step, false, use);
2136
2137   /* The same, but with initial value zero.  Make such variable important,
2138      since it is generic enough so that possibly many uses may be based
2139      on it.  */
2140   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2141                  iv->step, true, use);
2142
2143   /* Third, try removing the constant offset.  */
2144   base = strip_offset (iv->base, &offset);
2145   if (offset)
2146     add_candidate (data, base, iv->step, false, use);
2147 }
2148
2149 /* Adds candidates based on the uses.  */
2150
2151 static void
2152 add_derived_ivs_candidates (struct ivopts_data *data)
2153 {
2154   unsigned i;
2155
2156   for (i = 0; i < n_iv_uses (data); i++)
2157     {
2158       struct iv_use *use = iv_use (data, i);
2159
2160       if (!use)
2161         continue;
2162
2163       switch (use->type)
2164         {
2165         case USE_NONLINEAR_EXPR:
2166         case USE_COMPARE:
2167         case USE_ADDRESS:
2168           /* Just add the ivs based on the value of the iv used here.  */
2169           add_iv_value_candidates (data, use->iv, use);
2170           break;
2171
2172         default:
2173           gcc_unreachable ();
2174         }
2175     }
2176 }
2177
2178 /* Record important candidates and add them to related_cands bitmaps
2179    if needed.  */
2180
2181 static void
2182 record_important_candidates (struct ivopts_data *data)
2183 {
2184   unsigned i;
2185   struct iv_use *use;
2186
2187   for (i = 0; i < n_iv_cands (data); i++)
2188     {
2189       struct iv_cand *cand = iv_cand (data, i);
2190
2191       if (cand->important)
2192         bitmap_set_bit (data->important_candidates, i);
2193     }
2194
2195   data->consider_all_candidates = (n_iv_cands (data)
2196                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2197
2198   if (data->consider_all_candidates)
2199     {
2200       /* We will not need "related_cands" bitmaps in this case,
2201          so release them to decrease peak memory consumption.  */
2202       for (i = 0; i < n_iv_uses (data); i++)
2203         {
2204           use = iv_use (data, i);
2205           BITMAP_FREE (use->related_cands);
2206         }
2207     }
2208   else
2209     {
2210       /* Add important candidates to the related_cands bitmaps.  */
2211       for (i = 0; i < n_iv_uses (data); i++)
2212         bitmap_ior_into (iv_use (data, i)->related_cands,
2213                          data->important_candidates);
2214     }
2215 }
2216
2217 /* Finds the candidates for the induction variables.  */
2218
2219 static void
2220 find_iv_candidates (struct ivopts_data *data)
2221 {
2222   /* Add commonly used ivs.  */
2223   add_standard_iv_candidates (data);
2224
2225   /* Add old induction variables.  */
2226   add_old_ivs_candidates (data);
2227
2228   /* Add induction variables derived from uses.  */
2229   add_derived_ivs_candidates (data);
2230
2231   /* Record the important candidates.  */
2232   record_important_candidates (data);
2233 }
2234
2235 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2236    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2237    we allocate a simple list to every use.  */
2238
2239 static void
2240 alloc_use_cost_map (struct ivopts_data *data)
2241 {
2242   unsigned i, size, s, j;
2243
2244   for (i = 0; i < n_iv_uses (data); i++)
2245     {
2246       struct iv_use *use = iv_use (data, i);
2247       bitmap_iterator bi;
2248
2249       if (data->consider_all_candidates)
2250         size = n_iv_cands (data);
2251       else
2252         {
2253           s = 0;
2254           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2255             {
2256               s++;
2257             }
2258
2259           /* Round up to the power of two, so that moduling by it is fast.  */
2260           for (size = 1; size < s; size <<= 1)
2261             continue;
2262         }
2263
2264       use->n_map_members = size;
2265       use->cost_map = XCNEWVEC (struct cost_pair, size);
2266     }
2267 }
2268
2269 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2270    on invariants DEPENDS_ON and that the value used in expressing it
2271    is VALUE.*/
2272
2273 static void
2274 set_use_iv_cost (struct ivopts_data *data,
2275                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2276                  bitmap depends_on, tree value)
2277 {
2278   unsigned i, s;
2279
2280   if (cost == INFTY)
2281     {
2282       BITMAP_FREE (depends_on);
2283       return;
2284     }
2285
2286   if (data->consider_all_candidates)
2287     {
2288       use->cost_map[cand->id].cand = cand;
2289       use->cost_map[cand->id].cost = cost;
2290       use->cost_map[cand->id].depends_on = depends_on;
2291       use->cost_map[cand->id].value = value;
2292       return;
2293     }
2294
2295   /* n_map_members is a power of two, so this computes modulo.  */
2296   s = cand->id & (use->n_map_members - 1);
2297   for (i = s; i < use->n_map_members; i++)
2298     if (!use->cost_map[i].cand)
2299       goto found;
2300   for (i = 0; i < s; i++)
2301     if (!use->cost_map[i].cand)
2302       goto found;
2303
2304   gcc_unreachable ();
2305
2306 found:
2307   use->cost_map[i].cand = cand;
2308   use->cost_map[i].cost = cost;
2309   use->cost_map[i].depends_on = depends_on;
2310   use->cost_map[i].value = value;
2311 }
2312
2313 /* Gets cost of (USE, CANDIDATE) pair.  */
2314
2315 static struct cost_pair *
2316 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2317                  struct iv_cand *cand)
2318 {
2319   unsigned i, s;
2320   struct cost_pair *ret;
2321
2322   if (!cand)
2323     return NULL;
2324
2325   if (data->consider_all_candidates)
2326     {
2327       ret = use->cost_map + cand->id;
2328       if (!ret->cand)
2329         return NULL;
2330
2331       return ret;
2332     }
2333
2334   /* n_map_members is a power of two, so this computes modulo.  */
2335   s = cand->id & (use->n_map_members - 1);
2336   for (i = s; i < use->n_map_members; i++)
2337     if (use->cost_map[i].cand == cand)
2338       return use->cost_map + i;
2339
2340   for (i = 0; i < s; i++)
2341     if (use->cost_map[i].cand == cand)
2342       return use->cost_map + i;
2343
2344   return NULL;
2345 }
2346
2347 /* Returns estimate on cost of computing SEQ.  */
2348
2349 static unsigned
2350 seq_cost (rtx seq)
2351 {
2352   unsigned cost = 0;
2353   rtx set;
2354
2355   for (; seq; seq = NEXT_INSN (seq))
2356     {
2357       set = single_set (seq);
2358       if (set)
2359         cost += rtx_cost (set, SET);
2360       else
2361         cost++;
2362     }
2363
2364   return cost;
2365 }
2366
2367 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2368 static rtx
2369 produce_memory_decl_rtl (tree obj, int *regno)
2370 {
2371   rtx x;
2372
2373   gcc_assert (obj);
2374   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2375     {
2376       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2377       x = gen_rtx_SYMBOL_REF (Pmode, name);
2378       SET_SYMBOL_REF_DECL (x, obj);
2379       x = gen_rtx_MEM (DECL_MODE (obj), x);
2380       targetm.encode_section_info (obj, x, true);
2381     }
2382   else
2383     {
2384       x = gen_raw_REG (Pmode, (*regno)++);
2385       x = gen_rtx_MEM (DECL_MODE (obj), x);
2386     }
2387
2388   return x;
2389 }
2390
2391 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2392    walk_tree.  DATA contains the actual fake register number.  */
2393
2394 static tree
2395 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2396 {
2397   tree obj = NULL_TREE;
2398   rtx x = NULL_RTX;
2399   int *regno = (int *) data;
2400
2401   switch (TREE_CODE (*expr_p))
2402     {
2403     case ADDR_EXPR:
2404       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2405            handled_component_p (*expr_p);
2406            expr_p = &TREE_OPERAND (*expr_p, 0))
2407         continue;
2408       obj = *expr_p;
2409       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2410         x = produce_memory_decl_rtl (obj, regno);
2411       break;
2412
2413     case SSA_NAME:
2414       *ws = 0;
2415       obj = SSA_NAME_VAR (*expr_p);
2416       if (!DECL_RTL_SET_P (obj))
2417         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2418       break;
2419
2420     case VAR_DECL:
2421     case PARM_DECL:
2422     case RESULT_DECL:
2423       *ws = 0;
2424       obj = *expr_p;
2425
2426       if (DECL_RTL_SET_P (obj))
2427         break;
2428
2429       if (DECL_MODE (obj) == BLKmode)
2430         x = produce_memory_decl_rtl (obj, regno);
2431       else
2432         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2433
2434       break;
2435
2436     default:
2437       break;
2438     }
2439
2440   if (x)
2441     {
2442       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2443       SET_DECL_RTL (obj, x);
2444     }
2445
2446   return NULL_TREE;
2447 }
2448
2449 /* Determines cost of the computation of EXPR.  */
2450
2451 static unsigned
2452 computation_cost (tree expr)
2453 {
2454   rtx seq, rslt;
2455   tree type = TREE_TYPE (expr);
2456   unsigned cost;
2457   /* Avoid using hard regs in ways which may be unsupported.  */
2458   int regno = LAST_VIRTUAL_REGISTER + 1;
2459
2460   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2461   start_sequence ();
2462   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2463   seq = get_insns ();
2464   end_sequence ();
2465
2466   cost = seq_cost (seq);
2467   if (MEM_P (rslt))
2468     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2469
2470   return cost;
2471 }
2472
2473 /* Returns variable containing the value of candidate CAND at statement AT.  */
2474
2475 static tree
2476 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2477 {
2478   if (stmt_after_increment (loop, cand, stmt))
2479     return cand->var_after;
2480   else
2481     return cand->var_before;
2482 }
2483
2484 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2485    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2486
2487 int
2488 tree_int_cst_sign_bit (const_tree t)
2489 {
2490   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2491   unsigned HOST_WIDE_INT w;
2492
2493   if (bitno < HOST_BITS_PER_WIDE_INT)
2494     w = TREE_INT_CST_LOW (t);
2495   else
2496     {
2497       w = TREE_INT_CST_HIGH (t);
2498       bitno -= HOST_BITS_PER_WIDE_INT;
2499     }
2500
2501   return (w >> bitno) & 1;
2502 }
2503
2504 /* If we can prove that TOP = cst * BOT for some constant cst,
2505    store cst to MUL and return true.  Otherwise return false.
2506    The returned value is always sign-extended, regardless of the
2507    signedness of TOP and BOT.  */
2508
2509 static bool
2510 constant_multiple_of (tree top, tree bot, double_int *mul)
2511 {
2512   tree mby;
2513   enum tree_code code;
2514   double_int res, p0, p1;
2515   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2516
2517   STRIP_NOPS (top);
2518   STRIP_NOPS (bot);
2519
2520   if (operand_equal_p (top, bot, 0))
2521     {
2522       *mul = double_int_one;
2523       return true;
2524     }
2525
2526   code = TREE_CODE (top);
2527   switch (code)
2528     {
2529     case MULT_EXPR:
2530       mby = TREE_OPERAND (top, 1);
2531       if (TREE_CODE (mby) != INTEGER_CST)
2532         return false;
2533
2534       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2535         return false;
2536
2537       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2538                               precision);
2539       return true;
2540
2541     case PLUS_EXPR:
2542     case MINUS_EXPR:
2543       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2544           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2545         return false;
2546
2547       if (code == MINUS_EXPR)
2548         p1 = double_int_neg (p1);
2549       *mul = double_int_sext (double_int_add (p0, p1), precision);
2550       return true;
2551
2552     case INTEGER_CST:
2553       if (TREE_CODE (bot) != INTEGER_CST)
2554         return false;
2555
2556       p0 = double_int_sext (tree_to_double_int (top), precision);
2557       p1 = double_int_sext (tree_to_double_int (bot), precision);
2558       if (double_int_zero_p (p1))
2559         return false;
2560       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2561                               precision);
2562       return double_int_zero_p (res);
2563
2564     default:
2565       return false;
2566     }
2567 }
2568
2569 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2570    same precision that is at least as wide as the precision of TYPE, stores
2571    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2572    type of A and B.  */
2573
2574 static tree
2575 determine_common_wider_type (tree *a, tree *b)
2576 {
2577   tree wider_type = NULL;
2578   tree suba, subb;
2579   tree atype = TREE_TYPE (*a);
2580
2581   if ((TREE_CODE (*a) == NOP_EXPR
2582        || TREE_CODE (*a) == CONVERT_EXPR))
2583     {
2584       suba = TREE_OPERAND (*a, 0);
2585       wider_type = TREE_TYPE (suba);
2586       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2587         return atype;
2588     }
2589   else
2590     return atype;
2591
2592   if ((TREE_CODE (*b) == NOP_EXPR
2593        || TREE_CODE (*b) == CONVERT_EXPR))
2594     {
2595       subb = TREE_OPERAND (*b, 0);
2596       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2597         return atype;
2598     }
2599   else
2600     return atype;
2601
2602   *a = suba;
2603   *b = subb;
2604   return wider_type;
2605 }
2606
2607 /* Determines the expression by that USE is expressed from induction variable
2608    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2609    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2610
2611 static bool
2612 get_computation_aff (struct loop *loop,
2613                      struct iv_use *use, struct iv_cand *cand, tree at,
2614                      struct affine_tree_combination *aff)
2615 {
2616   tree ubase = use->iv->base;
2617   tree ustep = use->iv->step;
2618   tree cbase = cand->iv->base;
2619   tree cstep = cand->iv->step, cstep_common;
2620   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2621   tree common_type, var;
2622   tree uutype;
2623   aff_tree cbase_aff, var_aff;
2624   double_int rat;
2625
2626   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2627     {
2628       /* We do not have a precision to express the values of use.  */
2629       return false;
2630     }
2631
2632   var = var_at_stmt (loop, cand, at);
2633   uutype = unsigned_type_for (utype);
2634
2635   /* If the conversion is not noop, perform it.  */
2636   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2637     {
2638       cstep = fold_convert (uutype, cstep);
2639       cbase = fold_convert (uutype, cbase);
2640       var = fold_convert (uutype, var);
2641     }
2642
2643   if (!constant_multiple_of (ustep, cstep, &rat))
2644     return false;
2645
2646   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2647      type, we achieve better folding by computing their difference in this
2648      wider type, and cast the result to UUTYPE.  We do not need to worry about
2649      overflows, as all the arithmetics will in the end be performed in UUTYPE
2650      anyway.  */
2651   common_type = determine_common_wider_type (&ubase, &cbase);
2652
2653   /* use = ubase - ratio * cbase + ratio * var.  */
2654   tree_to_aff_combination (ubase, common_type, aff);
2655   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2656   tree_to_aff_combination (var, uutype, &var_aff);
2657
2658   /* We need to shift the value if we are after the increment.  */
2659   if (stmt_after_increment (loop, cand, at))
2660     {
2661       aff_tree cstep_aff;
2662
2663       if (common_type != uutype)
2664         cstep_common = fold_convert (common_type, cstep);
2665       else
2666         cstep_common = cstep;
2667
2668       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2669       aff_combination_add (&cbase_aff, &cstep_aff);
2670     }
2671
2672   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2673   aff_combination_add (aff, &cbase_aff);
2674   if (common_type != uutype)
2675     aff_combination_convert (aff, uutype);
2676
2677   aff_combination_scale (&var_aff, rat);
2678   aff_combination_add (aff, &var_aff);
2679
2680   return true;
2681 }
2682
2683 /* Determines the expression by that USE is expressed from induction variable
2684    CAND at statement AT in LOOP.  The computation is unshared.  */
2685
2686 static tree
2687 get_computation_at (struct loop *loop,
2688                     struct iv_use *use, struct iv_cand *cand, tree at)
2689 {
2690   aff_tree aff;
2691   tree type = TREE_TYPE (use->iv->base);
2692
2693   if (!get_computation_aff (loop, use, cand, at, &aff))
2694     return NULL_TREE;
2695   unshare_aff_combination (&aff);
2696   return fold_convert (type, aff_combination_to_tree (&aff));
2697 }
2698
2699 /* Determines the expression by that USE is expressed from induction variable
2700    CAND in LOOP.  The computation is unshared.  */
2701
2702 static tree
2703 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2704 {
2705   return get_computation_at (loop, use, cand, use->stmt);
2706 }
2707
2708 /* Returns cost of addition in MODE.  */
2709
2710 static unsigned
2711 add_cost (enum machine_mode mode)
2712 {
2713   static unsigned costs[NUM_MACHINE_MODES];
2714   rtx seq;
2715   unsigned cost;
2716
2717   if (costs[mode])
2718     return costs[mode];
2719
2720   start_sequence ();
2721   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2722                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2723                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2724                  NULL_RTX);
2725   seq = get_insns ();
2726   end_sequence ();
2727
2728   cost = seq_cost (seq);
2729   if (!cost)
2730     cost = 1;
2731
2732   costs[mode] = cost;
2733
2734   if (dump_file && (dump_flags & TDF_DETAILS))
2735     fprintf (dump_file, "Addition in %s costs %d\n",
2736              GET_MODE_NAME (mode), cost);
2737   return cost;
2738 }
2739
2740 /* Entry in a hashtable of already known costs for multiplication.  */
2741 struct mbc_entry
2742 {
2743   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2744   enum machine_mode mode;       /* In mode.  */
2745   unsigned cost;                /* The cost.  */
2746 };
2747
2748 /* Counts hash value for the ENTRY.  */
2749
2750 static hashval_t
2751 mbc_entry_hash (const void *entry)
2752 {
2753   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2754
2755   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2756 }
2757
2758 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2759
2760 static int
2761 mbc_entry_eq (const void *entry1, const void *entry2)
2762 {
2763   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2764   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2765
2766   return (e1->mode == e2->mode
2767           && e1->cst == e2->cst);
2768 }
2769
2770 /* Returns cost of multiplication by constant CST in MODE.  */
2771
2772 unsigned
2773 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2774 {
2775   static htab_t costs;
2776   struct mbc_entry **cached, act;
2777   rtx seq;
2778   unsigned cost;
2779
2780   if (!costs)
2781     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2782
2783   act.mode = mode;
2784   act.cst = cst;
2785   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2786   if (*cached)
2787     return (*cached)->cost;
2788
2789   *cached = XNEW (struct mbc_entry);
2790   (*cached)->mode = mode;
2791   (*cached)->cst = cst;
2792
2793   start_sequence ();
2794   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2795                gen_int_mode (cst, mode), NULL_RTX, 0);
2796   seq = get_insns ();
2797   end_sequence ();
2798
2799   cost = seq_cost (seq);
2800
2801   if (dump_file && (dump_flags & TDF_DETAILS))
2802     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2803              (int) cst, GET_MODE_NAME (mode), cost);
2804
2805   (*cached)->cost = cost;
2806
2807   return cost;
2808 }
2809
2810 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2811    validity for a memory reference accessing memory of mode MODE.  */
2812
2813 bool
2814 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2815 {
2816 #define MAX_RATIO 128
2817   static sbitmap valid_mult[MAX_MACHINE_MODE];
2818
2819   if (!valid_mult[mode])
2820     {
2821       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2822       rtx addr;
2823       HOST_WIDE_INT i;
2824
2825       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2826       sbitmap_zero (valid_mult[mode]);
2827       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2828       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2829         {
2830           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2831           if (memory_address_p (mode, addr))
2832             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2833         }
2834
2835       if (dump_file && (dump_flags & TDF_DETAILS))
2836         {
2837           fprintf (dump_file, "  allowed multipliers:");
2838           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2839             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2840               fprintf (dump_file, " %d", (int) i);
2841           fprintf (dump_file, "\n");
2842           fprintf (dump_file, "\n");
2843         }
2844     }
2845
2846   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2847     return false;
2848
2849   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2850 }
2851
2852 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2853    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2854    variable is omitted.  Compute the cost for a memory reference that accesses
2855    a memory location of mode MEM_MODE.
2856
2857    TODO -- there must be some better way.  This all is quite crude.  */
2858
2859 static unsigned
2860 get_address_cost (bool symbol_present, bool var_present,
2861                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2862                   enum machine_mode mem_mode)
2863 {
2864   static bool initialized[MAX_MACHINE_MODE];
2865   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2866   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2867   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2868   unsigned cost, acost;
2869   bool offset_p, ratio_p;
2870   HOST_WIDE_INT s_offset;
2871   unsigned HOST_WIDE_INT mask;
2872   unsigned bits;
2873
2874   if (!initialized[mem_mode])
2875     {
2876       HOST_WIDE_INT i;
2877       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2878       int old_cse_not_expected;
2879       unsigned sym_p, var_p, off_p, rat_p, add_c;
2880       rtx seq, addr, base;
2881       rtx reg0, reg1;
2882
2883       initialized[mem_mode] = true;
2884
2885       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2886
2887       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2888       for (i = start; i <= 1 << 20; i <<= 1)
2889         {
2890           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2891           if (!memory_address_p (mem_mode, addr))
2892             break;
2893         }
2894       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2895       off[mem_mode] = max_offset[mem_mode];
2896
2897       for (i = start; i <= 1 << 20; i <<= 1)
2898         {
2899           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2900           if (!memory_address_p (mem_mode, addr))
2901             break;
2902         }
2903       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2904
2905       if (dump_file && (dump_flags & TDF_DETAILS))
2906         {
2907           fprintf (dump_file, "get_address_cost:\n");
2908           fprintf (dump_file, "  min offset %s %d\n",
2909                    GET_MODE_NAME (mem_mode),
2910                    (int) min_offset[mem_mode]);
2911           fprintf (dump_file, "  max offset %s %d\n",
2912                    GET_MODE_NAME (mem_mode),
2913                    (int) max_offset[mem_mode]);
2914         }
2915
2916       rat[mem_mode] = 1;
2917       for (i = 2; i <= MAX_RATIO; i++)
2918         if (multiplier_allowed_in_address_p (i, mem_mode))
2919           {
2920             rat[mem_mode] = i;
2921             break;
2922           }
2923
2924       /* Compute the cost of various addressing modes.  */
2925       acost = 0;
2926       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2927       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2928
2929       for (i = 0; i < 16; i++)
2930         {
2931           sym_p = i & 1;
2932           var_p = (i >> 1) & 1;
2933           off_p = (i >> 2) & 1;
2934           rat_p = (i >> 3) & 1;
2935
2936           addr = reg0;
2937           if (rat_p)
2938             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2939                                    gen_int_mode (rat[mem_mode], Pmode));
2940
2941           if (var_p)
2942             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2943
2944           if (sym_p)
2945             {
2946               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2947               /* ??? We can run into trouble with some backends by presenting
2948                  it with symbols which havn't been properly passed through
2949                  targetm.encode_section_info.  By setting the local bit, we
2950                  enhance the probability of things working.  */
2951               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
2952
2953               if (off_p)
2954                 base = gen_rtx_fmt_e (CONST, Pmode,
2955                                       gen_rtx_fmt_ee (PLUS, Pmode,
2956                                                       base,
2957                                                       gen_int_mode (off[mem_mode],
2958                                                                     Pmode)));
2959             }
2960           else if (off_p)
2961             base = gen_int_mode (off[mem_mode], Pmode);
2962           else
2963             base = NULL_RTX;
2964
2965           if (base)
2966             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2967
2968           start_sequence ();
2969           /* To avoid splitting addressing modes, pretend that no cse will
2970              follow.  */
2971           old_cse_not_expected = cse_not_expected;
2972           cse_not_expected = true;
2973           addr = memory_address (mem_mode, addr);
2974           cse_not_expected = old_cse_not_expected;
2975           seq = get_insns ();
2976           end_sequence ();
2977
2978           acost = seq_cost (seq);
2979           acost += address_cost (addr, mem_mode);
2980
2981           if (!acost)
2982             acost = 1;
2983           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
2984         }
2985
2986       /* On some targets, it is quite expensive to load symbol to a register,
2987          which makes addresses that contain symbols look much more expensive.
2988          However, the symbol will have to be loaded in any case before the
2989          loop (and quite likely we have it in register already), so it does not
2990          make much sense to penalize them too heavily.  So make some final
2991          tweaks for the SYMBOL_PRESENT modes:
2992
2993          If VAR_PRESENT is false, and the mode obtained by changing symbol to
2994          var is cheaper, use this mode with small penalty.
2995          If VAR_PRESENT is true, try whether the mode with
2996          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
2997          if this is the case, use it.  */
2998       add_c = add_cost (Pmode);
2999       for (i = 0; i < 8; i++)
3000         {
3001           var_p = i & 1;
3002           off_p = (i >> 1) & 1;
3003           rat_p = (i >> 2) & 1;
3004
3005           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3006           if (var_p)
3007             acost += add_c;
3008
3009           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3010             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3011         }
3012
3013       if (dump_file && (dump_flags & TDF_DETAILS))
3014         {
3015           fprintf (dump_file, "Address costs:\n");
3016
3017           for (i = 0; i < 16; i++)
3018             {
3019               sym_p = i & 1;
3020               var_p = (i >> 1) & 1;
3021               off_p = (i >> 2) & 1;
3022               rat_p = (i >> 3) & 1;
3023
3024               fprintf (dump_file, "  ");
3025               if (sym_p)
3026                 fprintf (dump_file, "sym + ");
3027               if (var_p)
3028                 fprintf (dump_file, "var + ");
3029               if (off_p)
3030                 fprintf (dump_file, "cst + ");
3031               if (rat_p)
3032                 fprintf (dump_file, "rat * ");
3033
3034               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3035               fprintf (dump_file, "index costs %d\n", acost);
3036             }
3037           fprintf (dump_file, "\n");
3038         }
3039     }
3040
3041   bits = GET_MODE_BITSIZE (Pmode);
3042   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3043   offset &= mask;
3044   if ((offset >> (bits - 1) & 1))
3045     offset |= ~mask;
3046   s_offset = offset;
3047
3048   cost = 0;
3049   offset_p = (s_offset != 0
3050               && min_offset[mem_mode] <= s_offset
3051               && s_offset <= max_offset[mem_mode]);
3052   ratio_p = (ratio != 1
3053              && multiplier_allowed_in_address_p (ratio, mem_mode));
3054
3055   if (ratio != 1 && !ratio_p)
3056     cost += multiply_by_cost (ratio, Pmode);
3057
3058   if (s_offset && !offset_p && !symbol_present)
3059     cost += add_cost (Pmode);
3060
3061   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3062   return cost + acost;
3063 }
3064
3065 /* Estimates cost of forcing expression EXPR into a variable.  */
3066
3067 unsigned
3068 force_expr_to_var_cost (tree expr)
3069 {
3070   static bool costs_initialized = false;
3071   static unsigned integer_cost;
3072   static unsigned symbol_cost;
3073   static unsigned address_cost;
3074   tree op0, op1;
3075   unsigned cost0, cost1, cost;
3076   enum machine_mode mode;
3077
3078   if (!costs_initialized)
3079     {
3080       tree type = build_pointer_type (integer_type_node);
3081       tree var, addr;
3082       rtx x;
3083
3084       var = create_tmp_var_raw (integer_type_node, "test_var");
3085       TREE_STATIC (var) = 1;
3086       x = produce_memory_decl_rtl (var, NULL);
3087       SET_DECL_RTL (var, x);
3088
3089       integer_cost = computation_cost (build_int_cst (integer_type_node,
3090                                                       2000));
3091
3092       addr = build1 (ADDR_EXPR, type, var);
3093       symbol_cost = computation_cost (addr) + 1;
3094
3095       address_cost
3096         = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3097                                     addr,
3098                                     build_int_cst (sizetype, 2000))) + 1;
3099       if (dump_file && (dump_flags & TDF_DETAILS))
3100         {
3101           fprintf (dump_file, "force_expr_to_var_cost:\n");
3102           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3103           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3104           fprintf (dump_file, "  address %d\n", (int) address_cost);
3105           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3106           fprintf (dump_file, "\n");
3107         }
3108
3109       costs_initialized = true;
3110     }
3111
3112   STRIP_NOPS (expr);
3113
3114   if (SSA_VAR_P (expr))
3115     return 0;
3116
3117   if (TREE_INVARIANT (expr))
3118     {
3119       if (TREE_CODE (expr) == INTEGER_CST)
3120         return integer_cost;
3121
3122       if (TREE_CODE (expr) == ADDR_EXPR)
3123         {
3124           tree obj = TREE_OPERAND (expr, 0);
3125
3126           if (TREE_CODE (obj) == VAR_DECL
3127               || TREE_CODE (obj) == PARM_DECL
3128               || TREE_CODE (obj) == RESULT_DECL)
3129             return symbol_cost;
3130         }
3131
3132       return address_cost;
3133     }
3134
3135   switch (TREE_CODE (expr))
3136     {
3137     case POINTER_PLUS_EXPR:
3138     case PLUS_EXPR:
3139     case MINUS_EXPR:
3140     case MULT_EXPR:
3141       op0 = TREE_OPERAND (expr, 0);
3142       op1 = TREE_OPERAND (expr, 1);
3143       STRIP_NOPS (op0);
3144       STRIP_NOPS (op1);
3145
3146       if (is_gimple_val (op0))
3147         cost0 = 0;
3148       else
3149         cost0 = force_expr_to_var_cost (op0);
3150
3151       if (is_gimple_val (op1))
3152         cost1 = 0;
3153       else
3154         cost1 = force_expr_to_var_cost (op1);
3155
3156       break;
3157
3158     default:
3159       /* Just an arbitrary value, FIXME.  */
3160       return target_spill_cost;
3161     }
3162
3163   mode = TYPE_MODE (TREE_TYPE (expr));
3164   switch (TREE_CODE (expr))
3165     {
3166     case POINTER_PLUS_EXPR:
3167     case PLUS_EXPR:
3168     case MINUS_EXPR:
3169       cost = add_cost (mode);
3170       break;
3171
3172     case MULT_EXPR:
3173       if (cst_and_fits_in_hwi (op0))
3174         cost = multiply_by_cost (int_cst_value (op0), mode);
3175       else if (cst_and_fits_in_hwi (op1))
3176         cost = multiply_by_cost (int_cst_value (op1), mode);
3177       else
3178         return target_spill_cost;
3179       break;
3180
3181     default:
3182       gcc_unreachable ();
3183     }
3184
3185   cost += cost0;
3186   cost += cost1;
3187
3188   /* Bound the cost by target_spill_cost.  The parts of complicated
3189      computations often are either loop invariant or at least can
3190      be shared between several iv uses, so letting this grow without
3191      limits would not give reasonable results.  */
3192   return cost < target_spill_cost ? cost : target_spill_cost;
3193 }
3194
3195 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3196    invariants the computation depends on.  */
3197
3198 static unsigned
3199 force_var_cost (struct ivopts_data *data,
3200                 tree expr, bitmap *depends_on)
3201 {
3202   if (depends_on)
3203     {
3204       fd_ivopts_data = data;
3205       walk_tree (&expr, find_depends, depends_on, NULL);
3206     }
3207
3208   return force_expr_to_var_cost (expr);
3209 }
3210
3211 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3212    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3213    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3214    invariants the computation depends on.  */
3215
3216 static unsigned
3217 split_address_cost (struct ivopts_data *data,
3218                     tree addr, bool *symbol_present, bool *var_present,
3219                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3220 {
3221   tree core;
3222   HOST_WIDE_INT bitsize;
3223   HOST_WIDE_INT bitpos;
3224   tree toffset;
3225   enum machine_mode mode;
3226   int unsignedp, volatilep;
3227
3228   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3229                               &unsignedp, &volatilep, false);
3230
3231   if (toffset != 0
3232       || bitpos % BITS_PER_UNIT != 0
3233       || TREE_CODE (core) != VAR_DECL)
3234     {
3235       *symbol_present = false;
3236       *var_present = true;
3237       fd_ivopts_data = data;
3238       walk_tree (&addr, find_depends, depends_on, NULL);
3239       return target_spill_cost;
3240     }
3241
3242   *offset += bitpos / BITS_PER_UNIT;
3243   if (TREE_STATIC (core)
3244       || DECL_EXTERNAL (core))
3245     {
3246       *symbol_present = true;
3247       *var_present = false;
3248       return 0;
3249     }
3250
3251   *symbol_present = false;
3252   *var_present = true;
3253   return 0;
3254 }
3255
3256 /* Estimates cost of expressing difference of addresses E1 - E2 as
3257    var + symbol + offset.  The value of offset is added to OFFSET,
3258    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3259    part is missing.  DEPENDS_ON is a set of the invariants the computation
3260    depends on.  */
3261
3262 static unsigned
3263 ptr_difference_cost (struct ivopts_data *data,
3264                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3265                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3266 {
3267   HOST_WIDE_INT diff = 0;
3268   unsigned cost;
3269
3270   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3271
3272   if (ptr_difference_const (e1, e2, &diff))
3273     {
3274       *offset += diff;
3275       *symbol_present = false;
3276       *var_present = false;
3277       return 0;
3278     }
3279
3280   if (e2 == integer_zero_node)
3281     return split_address_cost (data, TREE_OPERAND (e1, 0),
3282                                symbol_present, var_present, offset, depends_on);
3283
3284   *symbol_present = false;
3285   *var_present = true;
3286
3287   cost = force_var_cost (data, e1, depends_on);
3288   cost += force_var_cost (data, e2, depends_on);
3289   cost += add_cost (Pmode);
3290
3291   return cost;
3292 }
3293
3294 /* Estimates cost of expressing difference E1 - E2 as
3295    var + symbol + offset.  The value of offset is added to OFFSET,
3296    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3297    part is missing.  DEPENDS_ON is a set of the invariants the computation
3298    depends on.  */
3299
3300 static unsigned
3301 difference_cost (struct ivopts_data *data,
3302                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3303                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3304 {
3305   unsigned cost;
3306   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3307   unsigned HOST_WIDE_INT off1, off2;
3308
3309   e1 = strip_offset (e1, &off1);
3310   e2 = strip_offset (e2, &off2);
3311   *offset += off1 - off2;
3312
3313   STRIP_NOPS (e1);
3314   STRIP_NOPS (e2);
3315
3316   if (TREE_CODE (e1) == ADDR_EXPR)
3317     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3318                                 depends_on);
3319   *symbol_present = false;
3320
3321   if (operand_equal_p (e1, e2, 0))
3322     {
3323       *var_present = false;
3324       return 0;
3325     }
3326   *var_present = true;
3327   if (integer_zerop (e2))
3328     return force_var_cost (data, e1, depends_on);
3329
3330   if (integer_zerop (e1))
3331     {
3332       cost = force_var_cost (data, e2, depends_on);
3333       cost += multiply_by_cost (-1, mode);
3334
3335       return cost;
3336     }
3337
3338   cost = force_var_cost (data, e1, depends_on);
3339   cost += force_var_cost (data, e2, depends_on);
3340   cost += add_cost (mode);
3341
3342   return cost;
3343 }
3344
3345 /* Determines the cost of the computation by that USE is expressed
3346    from induction variable CAND.  If ADDRESS_P is true, we just need
3347    to create an address from it, otherwise we want to get it into
3348    register.  A set of invariants we depend on is stored in
3349    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3350
3351 static unsigned
3352 get_computation_cost_at (struct ivopts_data *data,
3353                          struct iv_use *use, struct iv_cand *cand,
3354                          bool address_p, bitmap *depends_on, tree at)
3355 {
3356   tree ubase = use->iv->base, ustep = use->iv->step;
3357   tree cbase, cstep;
3358   tree utype = TREE_TYPE (ubase), ctype;
3359   unsigned HOST_WIDE_INT cstepi, offset = 0;
3360   HOST_WIDE_INT ratio, aratio;
3361   bool var_present, symbol_present;
3362   unsigned cost = 0, n_sums;
3363   double_int rat;
3364
3365   *depends_on = NULL;
3366
3367   /* Only consider real candidates.  */
3368   if (!cand->iv)
3369     return INFTY;
3370
3371   cbase = cand->iv->base;
3372   cstep = cand->iv->step;
3373   ctype = TREE_TYPE (cbase);
3374
3375   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3376     {
3377       /* We do not have a precision to express the values of use.  */
3378       return INFTY;
3379     }
3380
3381   if (address_p)
3382     {
3383       /* Do not try to express address of an object with computation based
3384          on address of a different object.  This may cause problems in rtl
3385          level alias analysis (that does not expect this to be happening,
3386          as this is illegal in C), and would be unlikely to be useful
3387          anyway.  */
3388       if (use->iv->base_object
3389           && cand->iv->base_object
3390           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3391         return INFTY;
3392     }
3393
3394   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3395     {
3396       /* TODO -- add direct handling of this case.  */
3397       goto fallback;
3398     }
3399
3400   /* CSTEPI is removed from the offset in case statement is after the
3401      increment.  If the step is not constant, we use zero instead.
3402      This is a bit imprecise (there is the extra addition), but
3403      redundancy elimination is likely to transform the code so that
3404      it uses value of the variable before increment anyway,
3405      so it is not that much unrealistic.  */
3406   if (cst_and_fits_in_hwi (cstep))
3407     cstepi = int_cst_value (cstep);
3408   else
3409     cstepi = 0;
3410
3411   if (!constant_multiple_of (ustep, cstep, &rat))
3412     return INFTY;
3413
3414   if (double_int_fits_in_shwi_p (rat))
3415     ratio = double_int_to_shwi (rat);
3416   else
3417     return INFTY;
3418
3419   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3420      or ratio == 1, it is better to handle this like
3421
3422      ubase - ratio * cbase + ratio * var
3423
3424      (also holds in the case ratio == -1, TODO.  */
3425
3426   if (cst_and_fits_in_hwi (cbase))
3427     {
3428       offset = - ratio * int_cst_value (cbase);
3429       cost += difference_cost (data,
3430                                ubase, integer_zero_node,
3431                                &symbol_present, &var_present, &offset,
3432                                depends_on);
3433     }
3434   else if (ratio == 1)
3435     {
3436       cost += difference_cost (data,
3437                                ubase, cbase,
3438                                &symbol_present, &var_present, &offset,
3439                                depends_on);
3440     }
3441   else
3442     {
3443       cost += force_var_cost (data, cbase, depends_on);
3444       cost += add_cost (TYPE_MODE (ctype));
3445       cost += difference_cost (data,
3446                                ubase, integer_zero_node,
3447                                &symbol_present, &var_present, &offset,
3448                                depends_on);
3449     }
3450
3451   /* If we are after the increment, the value of the candidate is higher by
3452      one iteration.  */
3453   if (stmt_after_increment (data->current_loop, cand, at))
3454     offset -= ratio * cstepi;
3455
3456   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3457      (symbol/var/const parts may be omitted).  If we are looking for an address,
3458      find the cost of addressing this.  */
3459   if (address_p)
3460     return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3461                                     TYPE_MODE (TREE_TYPE (*use->op_p)));
3462
3463   /* Otherwise estimate the costs for computing the expression.  */
3464   aratio = ratio > 0 ? ratio : -ratio;
3465   if (!symbol_present && !var_present && !offset)
3466     {
3467       if (ratio != 1)
3468         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3469
3470       return cost;
3471     }
3472
3473   if (aratio != 1)
3474     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3475
3476   n_sums = 1;
3477   if (var_present
3478       /* Symbol + offset should be compile-time computable.  */
3479       && (symbol_present || offset))
3480     n_sums++;
3481
3482   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3483
3484 fallback:
3485   {
3486     /* Just get the expression, expand it and measure the cost.  */
3487     tree comp = get_computation_at (data->current_loop, use, cand, at);
3488
3489     if (!comp)
3490       return INFTY;
3491
3492     if (address_p)
3493       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3494
3495     return computation_cost (comp);
3496   }
3497 }
3498
3499 /* Determines the cost of the computation by that USE is expressed
3500    from induction variable CAND.  If ADDRESS_P is true, we just need
3501    to create an address from it, otherwise we want to get it into
3502    register.  A set of invariants we depend on is stored in
3503    DEPENDS_ON.  */
3504
3505 static unsigned
3506 get_computation_cost (struct ivopts_data *data,
3507                       struct iv_use *use, struct iv_cand *cand,
3508                       bool address_p, bitmap *depends_on)
3509 {
3510   return get_computation_cost_at (data,
3511                                   use, cand, address_p, depends_on, use->stmt);
3512 }
3513
3514 /* Determines cost of basing replacement of USE on CAND in a generic
3515    expression.  */
3516
3517 static bool
3518 determine_use_iv_cost_generic (struct ivopts_data *data,
3519                                struct iv_use *use, struct iv_cand *cand)
3520 {
3521   bitmap depends_on;
3522   unsigned cost;
3523
3524   /* The simple case first -- if we need to express value of the preserved
3525      original biv, the cost is 0.  This also prevents us from counting the
3526      cost of increment twice -- once at this use and once in the cost of
3527      the candidate.  */
3528   if (cand->pos == IP_ORIGINAL
3529       && cand->incremented_at == use->stmt)
3530     {
3531       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3532       return true;
3533     }
3534
3535   cost = get_computation_cost (data, use, cand, false, &depends_on);
3536   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3537
3538   return cost != INFTY;
3539 }
3540
3541 /* Determines cost of basing replacement of USE on CAND in an address.  */
3542
3543 static bool
3544 determine_use_iv_cost_address (struct ivopts_data *data,
3545                                struct iv_use *use, struct iv_cand *cand)
3546 {
3547   bitmap depends_on;
3548   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3549
3550   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3551
3552   return cost != INFTY;
3553 }
3554
3555 /* Computes value of candidate CAND at position AT in iteration NITER, and
3556    stores it to VAL.  */
3557
3558 static void
3559 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3560                aff_tree *val)
3561 {
3562   aff_tree step, delta, nit;
3563   struct iv *iv = cand->iv;
3564   tree type = TREE_TYPE (iv->base);
3565
3566   tree_to_aff_combination (iv->step, type, &step);
3567   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3568   aff_combination_convert (&nit, type);
3569   aff_combination_mult (&nit, &step, &delta);
3570   if (stmt_after_increment (loop, cand, at))
3571     aff_combination_add (&delta, &step);
3572
3573   tree_to_aff_combination (iv->base, type, val);
3574   aff_combination_add (val, &delta);
3575 }
3576
3577 /* Returns period of induction variable iv.  */
3578
3579 static tree
3580 iv_period (struct iv *iv)
3581 {
3582   tree step = iv->step, period, type;
3583   tree pow2div;
3584
3585   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3586
3587   /* Period of the iv is gcd (step, type range).  Since type range is power
3588      of two, it suffices to determine the maximum power of two that divides
3589      step.  */
3590   pow2div = num_ending_zeros (step);
3591   type = unsigned_type_for (TREE_TYPE (step));
3592
3593   period = build_low_bits_mask (type,
3594                                 (TYPE_PRECISION (type)
3595                                  - tree_low_cst (pow2div, 1)));
3596
3597   return period;
3598 }
3599
3600 /* Returns the comparison operator used when eliminating the iv USE.  */
3601
3602 static enum tree_code
3603 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3604 {
3605   struct loop *loop = data->current_loop;
3606   basic_block ex_bb;
3607   edge exit;
3608
3609   ex_bb = bb_for_stmt (use->stmt);
3610   exit = EDGE_SUCC (ex_bb, 0);
3611   if (flow_bb_inside_loop_p (loop, exit->dest))
3612     exit = EDGE_SUCC (ex_bb, 1);
3613
3614   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3615 }
3616
3617 /* Check whether it is possible to express the condition in USE by comparison
3618    of candidate CAND.  If so, store the value compared with to BOUND.  */
3619
3620 static bool
3621 may_eliminate_iv (struct ivopts_data *data,
3622                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3623 {
3624   basic_block ex_bb;
3625   edge exit;
3626   tree nit, period;
3627   struct loop *loop = data->current_loop;
3628   aff_tree bnd;
3629   double_int period_value, max_niter;
3630
3631   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3632     return false;
3633
3634   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3635      for other conditions inside loop body.  */
3636   ex_bb = bb_for_stmt (use->stmt);
3637   if (use->stmt != last_stmt (ex_bb)
3638       || TREE_CODE (use->stmt) != COND_EXPR)
3639     return false;
3640   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3641     return false;
3642
3643   exit = EDGE_SUCC (ex_bb, 0);
3644   if (flow_bb_inside_loop_p (loop, exit->dest))
3645     exit = EDGE_SUCC (ex_bb, 1);
3646   if (flow_bb_inside_loop_p (loop, exit->dest))
3647     return false;
3648
3649   nit = niter_for_exit (data, exit);
3650   if (!nit)
3651     return false;
3652
3653   /* Determine whether we may use the variable to test whether niter iterations
3654      elapsed.  This is the case iff the period of the induction variable is
3655      greater than the number of iterations.  */
3656   period = iv_period (cand->iv);
3657   if (!period)
3658     return false;
3659
3660   /* Compare the period with the estimate on the number of iterations of the
3661      loop.  */
3662   if (!estimated_loop_iterations (loop, true, &max_niter))
3663     return false;
3664   period_value = tree_to_double_int (period);
3665   if (double_int_ucmp (period_value, max_niter) <= 0)
3666     return false;
3667
3668   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3669   *bound = aff_combination_to_tree (&bnd);
3670   return true;
3671 }
3672
3673 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3674
3675 static bool
3676 determine_use_iv_cost_condition (struct ivopts_data *data,
3677                                  struct iv_use *use, struct iv_cand *cand)
3678 {
3679   tree bound = NULL_TREE;
3680   struct iv *cmp_iv;
3681   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3682   unsigned elim_cost, express_cost, cost;
3683   bool ok;
3684
3685   /* Only consider real candidates.  */
3686   if (!cand->iv)
3687     {
3688       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3689       return false;
3690     }
3691
3692   /* Try iv elimination.  */
3693   if (may_eliminate_iv (data, use, cand, &bound))
3694     {
3695       elim_cost = force_var_cost (data, bound, &depends_on_elim);
3696       /* The bound is a loop invariant, so it will be only computed
3697          once.  */
3698       elim_cost /= AVG_LOOP_NITER (data->current_loop);
3699     }
3700   else
3701     elim_cost = INFTY;
3702
3703   /* Try expressing the original giv.  If it is compared with an invariant,
3704      note that we cannot get rid of it.  */
3705   ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3706   gcc_assert (ok);
3707
3708   express_cost = get_computation_cost (data, use, cand, false,
3709                                        &depends_on_express);
3710   fd_ivopts_data = data;
3711   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3712
3713   /* Choose the better approach.  */
3714   if (elim_cost < express_cost)
3715     {
3716       cost = elim_cost;
3717       depends_on = depends_on_elim;
3718       depends_on_elim = NULL;
3719     }
3720   else
3721     {
3722       cost = express_cost;
3723       depends_on = depends_on_express;
3724       depends_on_express = NULL;
3725       bound = NULL_TREE;
3726     }
3727
3728   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3729
3730   if (depends_on_elim)
3731     BITMAP_FREE (depends_on_elim);
3732   if (depends_on_express)
3733     BITMAP_FREE (depends_on_express);
3734
3735   return cost != INFTY;
3736 }
3737
3738 /* Determines cost of basing replacement of USE on CAND.  Returns false
3739    if USE cannot be based on CAND.  */
3740
3741 static bool
3742 determine_use_iv_cost (struct ivopts_data *data,
3743                        struct iv_use *use, struct iv_cand *cand)
3744 {
3745   switch (use->type)
3746     {
3747     case USE_NONLINEAR_EXPR:
3748       return determine_use_iv_cost_generic (data, use, cand);
3749
3750     case USE_ADDRESS:
3751       return determine_use_iv_cost_address (data, use, cand);
3752
3753     case USE_COMPARE:
3754       return determine_use_iv_cost_condition (data, use, cand);
3755
3756     default:
3757       gcc_unreachable ();
3758     }
3759 }
3760
3761 /* Determines costs of basing the use of the iv on an iv candidate.  */
3762
3763 static void
3764 determine_use_iv_costs (struct ivopts_data *data)
3765 {
3766   unsigned i, j;
3767   struct iv_use *use;
3768   struct iv_cand *cand;
3769   bitmap to_clear = BITMAP_ALLOC (NULL);
3770
3771   alloc_use_cost_map (data);
3772
3773   for (i = 0; i < n_iv_uses (data); i++)
3774     {
3775       use = iv_use (data, i);
3776
3777       if (data->consider_all_candidates)
3778         {
3779           for (j = 0; j < n_iv_cands (data); j++)
3780             {
3781               cand = iv_cand (data, j);
3782               determine_use_iv_cost (data, use, cand);
3783             }
3784         }
3785       else
3786         {
3787           bitmap_iterator bi;
3788
3789           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3790             {
3791               cand = iv_cand (data, j);
3792               if (!determine_use_iv_cost (data, use, cand))
3793                 bitmap_set_bit (to_clear, j);
3794             }
3795
3796           /* Remove the candidates for that the cost is infinite from
3797              the list of related candidates.  */
3798           bitmap_and_compl_into (use->related_cands, to_clear);
3799           bitmap_clear (to_clear);
3800         }
3801     }
3802
3803   BITMAP_FREE (to_clear);
3804
3805   if (dump_file && (dump_flags & TDF_DETAILS))
3806     {
3807       fprintf (dump_file, "Use-candidate costs:\n");
3808
3809       for (i = 0; i < n_iv_uses (data); i++)
3810         {
3811           use = iv_use (data, i);
3812
3813           fprintf (dump_file, "Use %d:\n", i);
3814           fprintf (dump_file, "  cand\tcost\tdepends on\n");
3815           for (j = 0; j < use->n_map_members; j++)
3816             {
3817               if (!use->cost_map[j].cand
3818                   || use->cost_map[j].cost == INFTY)
3819                 continue;
3820
3821               fprintf (dump_file, "  %d\t%d\t",
3822                        use->cost_map[j].cand->id,
3823                        use->cost_map[j].cost);
3824               if (use->cost_map[j].depends_on)
3825                 bitmap_print (dump_file,
3826                               use->cost_map[j].depends_on, "","");
3827               fprintf (dump_file, "\n");
3828             }
3829
3830           fprintf (dump_file, "\n");
3831         }
3832       fprintf (dump_file, "\n");
3833     }
3834 }
3835
3836 /* Determines cost of the candidate CAND.  */
3837
3838 static void
3839 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3840 {
3841   unsigned cost_base, cost_step;
3842   tree base;
3843
3844   if (!cand->iv)
3845     {
3846       cand->cost = 0;
3847       return;
3848     }
3849
3850   /* There are two costs associated with the candidate -- its increment
3851      and its initialization.  The second is almost negligible for any loop
3852      that rolls enough, so we take it just very little into account.  */
3853
3854   base = cand->iv->base;
3855   cost_base = force_var_cost (data, base, NULL);
3856   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3857
3858   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3859
3860   /* Prefer the original iv unless we may gain something by replacing it;
3861      this is not really relevant for artificial ivs created by other
3862      passes.  */
3863   if (cand->pos == IP_ORIGINAL
3864       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3865     cand->cost--;
3866
3867   /* Prefer not to insert statements into latch unless there are some
3868      already (so that we do not create unnecessary jumps).  */
3869   if (cand->pos == IP_END
3870       && empty_block_p (ip_end_pos (data->current_loop)))
3871     cand->cost++;
3872 }
3873
3874 /* Determines costs of computation of the candidates.  */
3875
3876 static void
3877 determine_iv_costs (struct ivopts_data *data)
3878 {
3879   unsigned i;
3880
3881   if (dump_file && (dump_flags & TDF_DETAILS))
3882     {
3883       fprintf (dump_file, "Candidate costs:\n");
3884       fprintf (dump_file, "  cand\tcost\n");
3885     }
3886
3887   for (i = 0; i < n_iv_cands (data); i++)
3888     {
3889       struct iv_cand *cand = iv_cand (data, i);
3890
3891       determine_iv_cost (data, cand);
3892
3893       if (dump_file && (dump_flags & TDF_DETAILS))
3894         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3895     }
3896
3897   if (dump_file && (dump_flags & TDF_DETAILS))
3898     fprintf (dump_file, "\n");
3899 }
3900
3901 /* Calculates cost for having SIZE induction variables.  */
3902
3903 static unsigned
3904 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3905 {
3906   /* We add size to the cost, so that we prefer eliminating ivs
3907      if possible.  */
3908   return size + estimate_reg_pressure_cost (size, data->regs_used);
3909 }
3910
3911 /* For each size of the induction variable set determine the penalty.  */
3912
3913 static void
3914 determine_set_costs (struct ivopts_data *data)
3915 {
3916   unsigned j, n;
3917   tree phi, op;
3918   struct loop *loop = data->current_loop;
3919   bitmap_iterator bi;
3920
3921   /* We use the following model (definitely improvable, especially the
3922      cost function -- TODO):
3923
3924      We estimate the number of registers available (using MD data), name it A.
3925
3926      We estimate the number of registers used by the loop, name it U.  This
3927      number is obtained as the number of loop phi nodes (not counting virtual
3928      registers and bivs) + the number of variables from outside of the loop.
3929
3930      We set a reserve R (free regs that are used for temporary computations,
3931      etc.).  For now the reserve is a constant 3.
3932
3933      Let I be the number of induction variables.
3934
3935      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3936         make a lot of ivs without a reason).
3937      -- if A - R < U + I <= A, the cost is I * PRES_COST
3938      -- if U + I > A, the cost is I * PRES_COST and
3939         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
3940
3941   if (dump_file && (dump_flags & TDF_DETAILS))
3942     {
3943       fprintf (dump_file, "Global costs:\n");
3944       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
3945       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost);
3946       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
3947     }
3948
3949   n = 0;
3950   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3951     {
3952       op = PHI_RESULT (phi);
3953
3954       if (!is_gimple_reg (op))
3955         continue;
3956
3957       if (get_iv (data, op))
3958         continue;
3959
3960       n++;
3961     }
3962
3963   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3964     {
3965       struct version_info *info = ver_info (data, j);
3966
3967       if (info->inv_id && info->has_nonlin_use)
3968         n++;
3969     }
3970
3971   data->regs_used = n;
3972   if (dump_file && (dump_flags & TDF_DETAILS))
3973     fprintf (dump_file, "  regs_used %d\n", n);
3974
3975   if (dump_file && (dump_flags & TDF_DETAILS))
3976     {
3977       fprintf (dump_file, "  cost for size:\n");
3978       fprintf (dump_file, "  ivs\tcost\n");
3979       for (j = 0; j <= 2 * target_avail_regs; j++)
3980         fprintf (dump_file, "  %d\t%d\n", j,
3981                  ivopts_global_cost_for_size (data, j));
3982       fprintf (dump_file, "\n");
3983     }
3984 }
3985
3986 /* Returns true if A is a cheaper cost pair than B.  */
3987
3988 static bool
3989 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3990 {
3991   if (!a)
3992     return false;
3993
3994   if (!b)
3995     return true;
3996
3997   if (a->cost < b->cost)
3998     return true;
3999
4000   if (a->cost > b->cost)
4001     return false;
4002
4003   /* In case the costs are the same, prefer the cheaper candidate.  */
4004   if (a->cand->cost < b->cand->cost)
4005     return true;
4006
4007   return false;
4008 }
4009
4010 /* Computes the cost field of IVS structure.  */
4011
4012 static void
4013 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4014 {
4015   unsigned cost = 0;
4016
4017   cost += ivs->cand_use_cost;
4018   cost += ivs->cand_cost;
4019   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4020
4021   ivs->cost = cost;
4022 }
4023
4024 /* Remove invariants in set INVS to set IVS.  */
4025
4026 static void
4027 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4028 {
4029   bitmap_iterator bi;
4030   unsigned iid;
4031
4032   if (!invs)
4033     return;
4034
4035   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4036     {
4037       ivs->n_invariant_uses[iid]--;
4038       if (ivs->n_invariant_uses[iid] == 0)
4039         ivs->n_regs--;
4040     }
4041 }
4042
4043 /* Set USE not to be expressed by any candidate in IVS.  */
4044
4045 static void
4046 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4047                  struct iv_use *use)
4048 {
4049   unsigned uid = use->id, cid;
4050   struct cost_pair *cp;
4051
4052   cp = ivs->cand_for_use[uid];
4053   if (!cp)
4054     return;
4055   cid = cp->cand->id;
4056
4057   ivs->bad_uses++;
4058   ivs->cand_for_use[uid] = NULL;
4059   ivs->n_cand_uses[cid]--;
4060
4061   if (ivs->n_cand_uses[cid] == 0)
4062     {
4063       bitmap_clear_bit (ivs->cands, cid);
4064       /* Do not count the pseudocandidates.  */
4065       if (cp->cand->iv)
4066         ivs->n_regs--;
4067       ivs->n_cands--;
4068       ivs->cand_cost -= cp->cand->cost;
4069
4070       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4071     }
4072
4073   ivs->cand_use_cost -= cp->cost;
4074
4075   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4076   iv_ca_recount_cost (data, ivs);
4077 }
4078
4079 /* Add invariants in set INVS to set IVS.  */
4080
4081 static void
4082 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4083 {
4084   bitmap_iterator bi;
4085   unsigned iid;
4086
4087   if (!invs)
4088     return;
4089
4090   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4091     {
4092       ivs->n_invariant_uses[iid]++;
4093       if (ivs->n_invariant_uses[iid] == 1)
4094         ivs->n_regs++;
4095     }
4096 }
4097
4098 /* Set cost pair for USE in set IVS to CP.  */
4099
4100 static void
4101 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4102               struct iv_use *use, struct cost_pair *cp)
4103 {
4104   unsigned uid = use->id, cid;
4105
4106   if (ivs->cand_for_use[uid] == cp)
4107     return;
4108
4109   if (ivs->cand_for_use[uid])
4110     iv_ca_set_no_cp (data, ivs, use);
4111
4112   if (cp)
4113     {
4114       cid = cp->cand->id;
4115
4116       ivs->bad_uses--;
4117       ivs->cand_for_use[uid] = cp;
4118       ivs->n_cand_uses[cid]++;
4119       if (ivs->n_cand_uses[cid] == 1)
4120         {
4121           bitmap_set_bit (ivs->cands, cid);
4122           /* Do not count the pseudocandidates.  */
4123           if (cp->cand->iv)
4124             ivs->n_regs++;
4125           ivs->n_cands++;
4126           ivs->cand_cost += cp->cand->cost;
4127
4128           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4129         }
4130
4131       ivs->cand_use_cost += cp->cost;
4132       iv_ca_set_add_invariants (ivs, cp->depends_on);
4133       iv_ca_recount_cost (data, ivs);
4134     }
4135 }
4136
4137 /* Extend set IVS by expressing USE by some of the candidates in it
4138    if possible.  */
4139
4140 static void
4141 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4142                struct iv_use *use)
4143 {
4144   struct cost_pair *best_cp = NULL, *cp;
4145   bitmap_iterator bi;
4146   unsigned i;
4147
4148   gcc_assert (ivs->upto >= use->id);
4149
4150   if (ivs->upto == use->id)
4151     {
4152       ivs->upto++;
4153       ivs->bad_uses++;
4154     }
4155
4156   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4157     {
4158       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4159
4160       if (cheaper_cost_pair (cp, best_cp))
4161         best_cp = cp;
4162     }
4163
4164   iv_ca_set_cp (data, ivs, use, best_cp);
4165 }
4166
4167 /* Get cost for assignment IVS.  */
4168
4169 static unsigned
4170 iv_ca_cost (struct iv_ca *ivs)
4171 {
4172   return (ivs->bad_uses ? INFTY : ivs->cost);
4173 }
4174
4175 /* Returns true if all dependences of CP are among invariants in IVS.  */
4176
4177 static bool
4178 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4179 {
4180   unsigned i;
4181   bitmap_iterator bi;
4182
4183   if (!cp->depends_on)
4184     return true;
4185
4186   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4187     {
4188       if (ivs->n_invariant_uses[i] == 0)
4189         return false;
4190     }
4191
4192   return true;
4193 }
4194
4195 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4196    it before NEXT_CHANGE.  */
4197
4198 static struct iv_ca_delta *
4199 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4200                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4201 {
4202   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4203
4204   change->use = use;
4205   change->old_cp = old_cp;
4206   change->new_cp = new_cp;
4207   change->next_change = next_change;
4208
4209   return change;
4210 }
4211
4212 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4213    are rewritten.  */
4214
4215 static struct iv_ca_delta *
4216 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4217 {
4218   struct iv_ca_delta *last;
4219
4220   if (!l2)
4221     return l1;
4222
4223   if (!l1)
4224     return l2;
4225
4226   for (last = l1; last->next_change; last = last->next_change)
4227     continue;
4228   last->next_change = l2;
4229
4230   return l1;
4231 }
4232
4233 /* Returns candidate by that USE is expressed in IVS.  */
4234
4235 static struct cost_pair *
4236 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4237 {
4238   return ivs->cand_for_use[use->id];
4239 }
4240
4241 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4242
4243 static struct iv_ca_delta *
4244 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4245 {
4246   struct iv_ca_delta *act, *next, *prev = NULL;
4247   struct cost_pair *tmp;
4248
4249   for (act = delta; act; act = next)
4250     {
4251       next = act->next_change;
4252       act->next_change = prev;
4253       prev = act;
4254
4255       tmp = act->old_cp;
4256       act->old_cp = act->new_cp;
4257       act->new_cp = tmp;
4258     }
4259
4260   return prev;
4261 }
4262
4263 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4264    reverted instead.  */
4265
4266 static void
4267 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4268                     struct iv_ca_delta *delta, bool forward)
4269 {
4270   struct cost_pair *from, *to;
4271   struct iv_ca_delta *act;
4272
4273   if (!forward)
4274     delta = iv_ca_delta_reverse (delta);
4275
4276   for (act = delta; act; act = act->next_change)
4277     {
4278       from = act->old_cp;
4279       to = act->new_cp;
4280       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4281       iv_ca_set_cp (data, ivs, act->use, to);
4282     }
4283
4284   if (!forward)
4285     iv_ca_delta_reverse (delta);
4286 }
4287
4288 /* Returns true if CAND is used in IVS.  */
4289
4290 static bool
4291 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4292 {
4293   return ivs->n_cand_uses[cand->id] > 0;
4294 }
4295
4296 /* Returns number of induction variable candidates in the set IVS.  */
4297
4298 static unsigned
4299 iv_ca_n_cands (struct iv_ca *ivs)
4300 {
4301   return ivs->n_cands;
4302 }
4303
4304 /* Free the list of changes DELTA.  */
4305
4306 static void
4307 iv_ca_delta_free (struct iv_ca_delta **delta)
4308 {
4309   struct iv_ca_delta *act, *next;
4310
4311   for (act = *delta; act; act = next)
4312     {
4313       next = act->next_change;
4314       free (act);
4315     }
4316
4317   *delta = NULL;
4318 }
4319
4320 /* Allocates new iv candidates assignment.  */
4321
4322 static struct iv_ca *
4323 iv_ca_new (struct ivopts_data *data)
4324 {
4325   struct iv_ca *nw = XNEW (struct iv_ca);
4326
4327   nw->upto = 0;
4328   nw->bad_uses = 0;
4329   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4330   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4331   nw->cands = BITMAP_ALLOC (NULL);
4332   nw->n_cands = 0;
4333   nw->n_regs = 0;
4334   nw->cand_use_cost = 0;
4335   nw->cand_cost = 0;
4336   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4337   nw->cost = 0;
4338
4339   return nw;
4340 }
4341
4342 /* Free memory occupied by the set IVS.  */
4343
4344 static void
4345 iv_ca_free (struct iv_ca **ivs)
4346 {
4347   free ((*ivs)->cand_for_use);
4348   free ((*ivs)->n_cand_uses);
4349   BITMAP_FREE ((*ivs)->cands);
4350   free ((*ivs)->n_invariant_uses);
4351   free (*ivs);
4352   *ivs = NULL;
4353 }
4354
4355 /* Dumps IVS to FILE.  */
4356
4357 static void
4358 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4359 {
4360   const char *pref = "  invariants ";
4361   unsigned i;
4362
4363   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4364   bitmap_print (file, ivs->cands, "  candidates ","\n");
4365
4366   for (i = 1; i <= data->max_inv_id; i++)
4367     if (ivs->n_invariant_uses[i])
4368       {
4369         fprintf (file, "%s%d", pref, i);
4370         pref = ", ";
4371       }
4372   fprintf (file, "\n");
4373 }
4374
4375 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4376    new set, and store differences in DELTA.  Number of induction variables
4377    in the new set is stored to N_IVS.  */
4378
4379 static unsigned
4380 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4381               struct iv_cand *cand, struct iv_ca_delta **delta,
4382               unsigned *n_ivs)
4383 {
4384   unsigned i, cost;
4385   struct iv_use *use;
4386   struct cost_pair *old_cp, *new_cp;
4387
4388   *delta = NULL;
4389   for (i = 0; i < ivs->upto; i++)
4390     {
4391       use = iv_use (data, i);
4392       old_cp = iv_ca_cand_for_use (ivs, use);
4393
4394       if (old_cp
4395           && old_cp->cand == cand)
4396         continue;
4397
4398       new_cp = get_use_iv_cost (data, use, cand);
4399       if (!new_cp)
4400         continue;
4401
4402       if (!iv_ca_has_deps (ivs, new_cp))
4403         continue;
4404
4405       if (!cheaper_cost_pair (new_cp, old_cp))
4406         continue;
4407
4408       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4409     }
4410
4411   iv_ca_delta_commit (data, ivs, *delta, true);
4412   cost = iv_ca_cost (ivs);
4413   if (n_ivs)
4414     *n_ivs = iv_ca_n_cands (ivs);
4415   iv_ca_delta_commit (data, ivs, *delta, false);
4416
4417   return cost;
4418 }
4419
4420 /* Try narrowing set IVS by removing CAND.  Return the cost of
4421    the new set and store the differences in DELTA.  */
4422
4423 static unsigned
4424 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4425               struct iv_cand *cand, struct iv_ca_delta **delta)
4426 {
4427   unsigned i, ci;
4428   struct iv_use *use;
4429   struct cost_pair *old_cp, *new_cp, *cp;
4430   bitmap_iterator bi;
4431   struct iv_cand *cnd;
4432   unsigned cost;
4433
4434   *delta = NULL;
4435   for (i = 0; i < n_iv_uses (data); i++)
4436     {
4437       use = iv_use (data, i);
4438
4439       old_cp = iv_ca_cand_for_use (ivs, use);
4440       if (old_cp->cand != cand)
4441         continue;
4442
4443       new_cp = NULL;
4444
4445       if (data->consider_all_candidates)
4446         {
4447           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4448             {
4449               if (ci == cand->id)
4450                 continue;
4451
4452               cnd = iv_cand (data, ci);
4453
4454               cp = get_use_iv_cost (data, use, cnd);
4455               if (!cp)
4456                 continue;
4457               if (!iv_ca_has_deps (ivs, cp))
4458                 continue;
4459
4460               if (!cheaper_cost_pair (cp, new_cp))
4461                 continue;
4462
4463               new_cp = cp;
4464             }
4465         }
4466       else
4467         {
4468           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4469             {
4470               if (ci == cand->id)
4471                 continue;
4472
4473               cnd = iv_cand (data, ci);
4474
4475               cp = get_use_iv_cost (data, use, cnd);
4476               if (!cp)
4477                 continue;
4478               if (!iv_ca_has_deps (ivs, cp))
4479                 continue;
4480
4481               if (!cheaper_cost_pair (cp, new_cp))
4482                 continue;
4483
4484               new_cp = cp;
4485             }
4486         }
4487
4488       if (!new_cp)
4489         {
4490           iv_ca_delta_free (delta);
4491           return INFTY;
4492         }
4493
4494       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4495     }
4496
4497   iv_ca_delta_commit (data, ivs, *delta, true);
4498   cost = iv_ca_cost (ivs);
4499   iv_ca_delta_commit (data, ivs, *delta, false);
4500
4501   return cost;
4502 }
4503
4504 /* Try optimizing the set of candidates IVS by removing candidates different
4505    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4506    differences in DELTA.  */
4507
4508 static unsigned
4509 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4510              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4511 {
4512   bitmap_iterator bi;
4513   struct iv_ca_delta *act_delta, *best_delta;
4514   unsigned i, best_cost, acost;
4515   struct iv_cand *cand;
4516
4517   best_delta = NULL;
4518   best_cost = iv_ca_cost (ivs);
4519
4520   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4521     {
4522       cand = iv_cand (data, i);
4523
4524       if (cand == except_cand)
4525         continue;
4526
4527       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4528
4529       if (acost < best_cost)
4530         {
4531           best_cost = acost;
4532           iv_ca_delta_free (&best_delta);
4533           best_delta = act_delta;
4534         }
4535       else
4536         iv_ca_delta_free (&act_delta);
4537     }
4538
4539   if (!best_delta)
4540     {
4541       *delta = NULL;
4542       return best_cost;
4543     }
4544
4545   /* Recurse to possibly remove other unnecessary ivs.  */
4546   iv_ca_delta_commit (data, ivs, best_delta, true);
4547   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4548   iv_ca_delta_commit (data, ivs, best_delta, false);
4549   *delta = iv_ca_delta_join (best_delta, *delta);
4550   return best_cost;
4551 }
4552
4553 /* Tries to extend the sets IVS in the best possible way in order
4554    to express the USE.  */
4555
4556 static bool
4557 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4558                   struct iv_use *use)
4559 {
4560   unsigned best_cost, act_cost;
4561   unsigned i;
4562   bitmap_iterator bi;
4563   struct iv_cand *cand;
4564   struct iv_ca_delta *best_delta = NULL, *act_delta;
4565   struct cost_pair *cp;
4566
4567   iv_ca_add_use (data, ivs, use);
4568   best_cost = iv_ca_cost (ivs);
4569
4570   cp = iv_ca_cand_for_use (ivs, use);
4571   if (cp)
4572     {
4573       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4574       iv_ca_set_no_cp (data, ivs, use);
4575     }
4576
4577   /* First try important candidates.  Only if it fails, try the specific ones.
4578      Rationale -- in loops with many variables the best choice often is to use
4579      just one generic biv.  If we added here many ivs specific to the uses,
4580      the optimization algorithm later would be likely to get stuck in a local
4581      minimum, thus causing us to create too many ivs.  The approach from
4582      few ivs to more seems more likely to be successful -- starting from few
4583      ivs, replacing an expensive use by a specific iv should always be a
4584      win.  */
4585   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4586     {
4587       cand = iv_cand (data, i);
4588
4589       if (iv_ca_cand_used_p (ivs, cand))
4590         continue;
4591
4592       cp = get_use_iv_cost (data, use, cand);
4593       if (!cp)
4594         continue;
4595
4596       iv_ca_set_cp (data, ivs, use, cp);
4597       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4598       iv_ca_set_no_cp (data, ivs, use);
4599       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4600
4601       if (act_cost < best_cost)
4602         {
4603           best_cost = act_cost;
4604
4605           iv_ca_delta_free (&best_delta);
4606           best_delta = act_delta;
4607         }
4608       else
4609         iv_ca_delta_free (&act_delta);
4610     }
4611
4612   if (best_cost == INFTY)
4613     {
4614       for (i = 0; i < use->n_map_members; i++)
4615         {
4616           cp = use->cost_map + i;
4617           cand = cp->cand;
4618           if (!cand)
4619             continue;
4620
4621           /* Already tried this.  */
4622           if (cand->important)
4623             continue;
4624
4625           if (iv_ca_cand_used_p (ivs, cand))
4626             continue;
4627
4628           act_delta = NULL;
4629           iv_ca_set_cp (data, ivs, use, cp);
4630           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4631           iv_ca_set_no_cp (data, ivs, use);
4632           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4633                                        cp, act_delta);
4634
4635           if (act_cost < best_cost)
4636             {
4637               best_cost = act_cost;
4638
4639               if (best_delta)
4640                 iv_ca_delta_free (&best_delta);
4641               best_delta = act_delta;
4642             }
4643           else
4644             iv_ca_delta_free (&act_delta);
4645         }
4646     }
4647
4648   iv_ca_delta_commit (data, ivs, best_delta, true);
4649   iv_ca_delta_free (&best_delta);
4650
4651   return (best_cost != INFTY);
4652 }
4653
4654 /* Finds an initial assignment of candidates to uses.  */
4655
4656 static struct iv_ca *
4657 get_initial_solution (struct ivopts_data *data)
4658 {
4659   struct iv_ca *ivs = iv_ca_new (data);
4660   unsigned i;
4661
4662   for (i = 0; i < n_iv_uses (data); i++)
4663     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4664       {
4665         iv_ca_free (&ivs);
4666         return NULL;
4667       }
4668
4669   return ivs;
4670 }
4671
4672 /* Tries to improve set of induction variables IVS.  */
4673
4674 static bool
4675 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4676 {
4677   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4678   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4679   struct iv_cand *cand;
4680
4681   /* Try extending the set of induction variables by one.  */
4682   for (i = 0; i < n_iv_cands (data); i++)
4683     {
4684       cand = iv_cand (data, i);
4685
4686       if (iv_ca_cand_used_p (ivs, cand))
4687         continue;
4688
4689       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4690       if (!act_delta)
4691         continue;
4692
4693       /* If we successfully added the candidate and the set is small enough,
4694          try optimizing it by removing other candidates.  */
4695       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4696         {
4697           iv_ca_delta_commit (data, ivs, act_delta, true);
4698           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4699           iv_ca_delta_commit (data, ivs, act_delta, false);
4700           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4701         }
4702
4703       if (acost < best_cost)
4704         {
4705           best_cost = acost;
4706           iv_ca_delta_free (&best_delta);
4707           best_delta = act_delta;
4708         }
4709       else
4710         iv_ca_delta_free (&act_delta);
4711     }
4712
4713   if (!best_delta)
4714     {
4715       /* Try removing the candidates from the set instead.  */
4716       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4717
4718       /* Nothing more we can do.  */
4719       if (!best_delta)
4720         return false;
4721     }
4722
4723   iv_ca_delta_commit (data, ivs, best_delta, true);
4724   gcc_assert (best_cost == iv_ca_cost (ivs));
4725   iv_ca_delta_free (&best_delta);
4726   return true;
4727 }
4728
4729 /* Attempts to find the optimal set of induction variables.  We do simple
4730    greedy heuristic -- we try to replace at most one candidate in the selected
4731    solution and remove the unused ivs while this improves the cost.  */
4732
4733 static struct iv_ca *
4734 find_optimal_iv_set (struct ivopts_data *data)
4735 {
4736   unsigned i;
4737   struct iv_ca *set;
4738   struct iv_use *use;
4739
4740   /* Get the initial solution.  */
4741   set = get_initial_solution (data);
4742   if (!set)
4743     {
4744       if (dump_file && (dump_flags & TDF_DETAILS))
4745         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4746       return NULL;
4747     }
4748
4749   if (dump_file && (dump_flags & TDF_DETAILS))
4750     {
4751       fprintf (dump_file, "Initial set of candidates:\n");
4752       iv_ca_dump (data, dump_file, set);
4753     }
4754
4755   while (try_improve_iv_set (data, set))
4756     {
4757       if (dump_file && (dump_flags & TDF_DETAILS))
4758         {
4759           fprintf (dump_file, "Improved to:\n");
4760           iv_ca_dump (data, dump_file, set);
4761         }
4762     }
4763
4764   if (dump_file && (dump_flags & TDF_DETAILS))
4765     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4766
4767   for (i = 0; i < n_iv_uses (data); i++)
4768     {
4769       use = iv_use (data, i);
4770       use->selected = iv_ca_cand_for_use (set, use)->cand;
4771     }
4772
4773   return set;
4774 }
4775
4776 /* Creates a new induction variable corresponding to CAND.  */
4777
4778 static void
4779 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4780 {
4781   block_stmt_iterator incr_pos;
4782   tree base;
4783   bool after = false;
4784
4785   if (!cand->iv)
4786     return;
4787
4788   switch (cand->pos)
4789     {
4790     case IP_NORMAL:
4791       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4792       break;
4793
4794     case IP_END:
4795       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4796       after = true;
4797       break;
4798
4799     case IP_ORIGINAL:
4800       /* Mark that the iv is preserved.  */
4801       name_info (data, cand->var_before)->preserve_biv = true;
4802       name_info (data, cand->var_after)->preserve_biv = true;
4803
4804       /* Rewrite the increment so that it uses var_before directly.  */
4805       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4806
4807       return;
4808     }
4809
4810   gimple_add_tmp_var (cand->var_before);
4811   add_referenced_var (cand->var_before);
4812
4813   base = unshare_expr (cand->iv->base);
4814
4815   create_iv (base, unshare_expr (cand->iv->step),
4816              cand->var_before, data->current_loop,
4817              &incr_pos, after, &cand->var_before, &cand->var_after);
4818 }
4819
4820 /* Creates new induction variables described in SET.  */
4821
4822 static void
4823 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4824 {
4825   unsigned i;
4826   struct iv_cand *cand;
4827   bitmap_iterator bi;
4828
4829   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4830     {
4831       cand = iv_cand (data, i);
4832       create_new_iv (data, cand);
4833     }
4834 }
4835
4836 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4837    is true, remove also the ssa name defined by the statement.  */
4838
4839 static void
4840 remove_statement (tree stmt, bool including_defined_name)
4841 {
4842   if (TREE_CODE (stmt) == PHI_NODE)
4843     {
4844       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4845     }
4846   else
4847     {
4848       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4849
4850       bsi_remove (&bsi, true);
4851       release_defs (stmt);
4852     }
4853 }
4854
4855 /* Rewrites USE (definition of iv used in a nonlinear expression)
4856    using candidate CAND.  */
4857
4858 static void
4859 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4860                             struct iv_use *use, struct iv_cand *cand)
4861 {
4862   tree comp;
4863   tree op, tgt, ass;
4864   block_stmt_iterator bsi;
4865
4866   /* An important special case -- if we are asked to express value of
4867      the original iv by itself, just exit; there is no need to
4868      introduce a new computation (that might also need casting the
4869      variable to unsigned and back).  */
4870   if (cand->pos == IP_ORIGINAL
4871       && cand->incremented_at == use->stmt)
4872     {
4873       tree step, ctype, utype;
4874       enum tree_code incr_code = PLUS_EXPR;
4875
4876       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4877       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4878
4879       step = cand->iv->step;
4880       ctype = TREE_TYPE (step);
4881       utype = TREE_TYPE (cand->var_after);
4882       if (TREE_CODE (step) == NEGATE_EXPR)
4883         {
4884           incr_code = MINUS_EXPR;
4885           step = TREE_OPERAND (step, 0);
4886         }
4887
4888       /* Check whether we may leave the computation unchanged.
4889          This is the case only if it does not rely on other
4890          computations in the loop -- otherwise, the computation
4891          we rely upon may be removed in remove_unused_ivs,
4892          thus leading to ICE.  */
4893       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4894       if (TREE_CODE (op) == PLUS_EXPR
4895           || TREE_CODE (op) == MINUS_EXPR
4896           || TREE_CODE (op) == POINTER_PLUS_EXPR)
4897         {
4898           if (TREE_OPERAND (op, 0) == cand->var_before)
4899             op = TREE_OPERAND (op, 1);
4900           else if (TREE_CODE (op) != MINUS_EXPR
4901                    && TREE_OPERAND (op, 1) == cand->var_before)
4902             op = TREE_OPERAND (op, 0);
4903           else
4904             op = NULL_TREE;
4905         }
4906       else
4907         op = NULL_TREE;
4908
4909       if (op
4910           && (TREE_CODE (op) == INTEGER_CST
4911               || operand_equal_p (op, step, 0)))
4912         return;
4913
4914       /* Otherwise, add the necessary computations to express
4915          the iv.  */
4916       op = fold_convert (ctype, cand->var_before);
4917       comp = fold_convert (utype,
4918                            build2 (incr_code, ctype, op,
4919                                    unshare_expr (step)));
4920     }
4921   else
4922     {
4923       comp = get_computation (data->current_loop, use, cand);
4924       gcc_assert (comp != NULL_TREE);
4925     }
4926
4927   switch (TREE_CODE (use->stmt))
4928     {
4929     case PHI_NODE:
4930       tgt = PHI_RESULT (use->stmt);
4931
4932       /* If we should keep the biv, do not replace it.  */
4933       if (name_info (data, tgt)->preserve_biv)
4934         return;
4935
4936       bsi = bsi_after_labels (bb_for_stmt (use->stmt));
4937       break;
4938
4939     case GIMPLE_MODIFY_STMT:
4940       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4941       bsi = bsi_for_stmt (use->stmt);
4942       break;
4943
4944     default:
4945       gcc_unreachable ();
4946     }
4947
4948   op = force_gimple_operand_bsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
4949                                  true, BSI_SAME_STMT);
4950
4951   if (TREE_CODE (use->stmt) == PHI_NODE)
4952     {
4953       ass = build_gimple_modify_stmt (tgt, op);
4954       bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
4955       remove_statement (use->stmt, false);
4956       SSA_NAME_DEF_STMT (tgt) = ass;
4957     }
4958   else
4959     GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4960 }
4961
4962 /* Replaces ssa name in index IDX by its basic variable.  Callback for
4963    for_each_index.  */
4964
4965 static bool
4966 idx_remove_ssa_names (tree base, tree *idx,
4967                       void *data ATTRIBUTE_UNUSED)
4968 {
4969   tree *op;
4970
4971   if (TREE_CODE (*idx) == SSA_NAME)
4972     *idx = SSA_NAME_VAR (*idx);
4973
4974   if (TREE_CODE (base) == ARRAY_REF)
4975     {
4976       op = &TREE_OPERAND (base, 2);
4977       if (*op
4978           && TREE_CODE (*op) == SSA_NAME)
4979         *op = SSA_NAME_VAR (*op);
4980       op = &TREE_OPERAND (base, 3);
4981       if (*op
4982           && TREE_CODE (*op) == SSA_NAME)
4983         *op = SSA_NAME_VAR (*op);
4984     }
4985
4986   return true;
4987 }
4988
4989 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
4990
4991 static tree
4992 unshare_and_remove_ssa_names (tree ref)
4993 {
4994   ref = unshare_expr (ref);
4995   for_each_index (&ref, idx_remove_ssa_names, NULL);
4996
4997   return ref;
4998 }
4999
5000 /* Extract the alias analysis info for the memory reference REF.  There are
5001    several ways how this information may be stored and what precisely is
5002    its semantics depending on the type of the reference, but there always is
5003    somewhere hidden one _DECL node that is used to determine the set of
5004    virtual operands for the reference.  The code below deciphers this jungle
5005    and extracts this single useful piece of information.  */
5006
5007 static tree
5008 get_ref_tag (tree ref, tree orig)
5009 {
5010   tree var = get_base_address (ref);
5011   tree aref = NULL_TREE, tag, sv;
5012   HOST_WIDE_INT offset, size, maxsize;
5013
5014   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5015     {
5016       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5017       if (ref)
5018         break;
5019     }
5020
5021   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5022     return aref;
5023
5024   if (!var)
5025     return NULL_TREE;
5026
5027   if (TREE_CODE (var) == INDIRECT_REF)
5028     {
5029       /* If the base is a dereference of a pointer, first check its name memory
5030          tag.  If it does not have one, use its symbol memory tag.  */
5031       var = TREE_OPERAND (var, 0);
5032       if (TREE_CODE (var) != SSA_NAME)
5033         return NULL_TREE;
5034
5035       if (SSA_NAME_PTR_INFO (var))
5036         {
5037           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5038           if (tag)
5039             return tag;
5040         }
5041
5042       var = SSA_NAME_VAR (var);
5043       tag = symbol_mem_tag (var);
5044       gcc_assert (tag != NULL_TREE);
5045       return tag;
5046     }
5047   else
5048     {
5049       if (!DECL_P (var))
5050         return NULL_TREE;
5051
5052       tag = symbol_mem_tag (var);
5053       if (tag)
5054         return tag;
5055
5056       return var;
5057     }
5058 }
5059
5060 /* Copies the reference information from OLD_REF to NEW_REF.  */
5061
5062 static void
5063 copy_ref_info (tree new_ref, tree old_ref)
5064 {
5065   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5066     copy_mem_ref_info (new_ref, old_ref);
5067   else
5068     {
5069       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5070       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5071     }
5072 }
5073
5074 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5075
5076 static void
5077 rewrite_use_address (struct ivopts_data *data,
5078                      struct iv_use *use, struct iv_cand *cand)
5079 {
5080   aff_tree aff;
5081   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5082   tree ref;
5083   bool ok;
5084
5085   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5086   gcc_assert (ok);
5087   unshare_aff_combination (&aff);
5088
5089   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5090   copy_ref_info (ref, *use->op_p);
5091   *use->op_p = ref;
5092 }
5093
5094 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5095    candidate CAND.  */
5096
5097 static void
5098 rewrite_use_compare (struct ivopts_data *data,
5099                      struct iv_use *use, struct iv_cand *cand)
5100 {
5101   tree comp, *var_p, op, bound;
5102   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5103   enum tree_code compare;
5104   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5105   bool ok;
5106
5107   bound = cp->value;
5108   if (bound)
5109     {
5110       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5111       tree var_type = TREE_TYPE (var);
5112
5113       compare = iv_elimination_compare (data, use);
5114       bound = unshare_expr (fold_convert (var_type, bound));
5115       op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE,
5116                                      true, BSI_SAME_STMT);
5117
5118       *use->op_p = build2 (compare, boolean_type_node, var, op);
5119       return;
5120     }
5121
5122   /* The induction variable elimination failed; just express the original
5123      giv.  */
5124   comp = get_computation (data->current_loop, use, cand);
5125   gcc_assert (comp != NULL_TREE);
5126
5127   ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5128   gcc_assert (ok);
5129
5130   *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5131                                      true, BSI_SAME_STMT);
5132 }
5133
5134 /* Rewrites USE using candidate CAND.  */
5135
5136 static void
5137 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5138 {
5139   push_stmt_changes (&use->stmt);
5140
5141   switch (use->type)
5142     {
5143       case USE_NONLINEAR_EXPR:
5144         rewrite_use_nonlinear_expr (data, use, cand);
5145         break;
5146
5147       case USE_ADDRESS:
5148         rewrite_use_address (data, use, cand);
5149         break;
5150
5151       case USE_COMPARE:
5152         rewrite_use_compare (data, use, cand);
5153         break;
5154
5155       default:
5156         gcc_unreachable ();
5157     }
5158
5159   pop_stmt_changes (&use->stmt);
5160 }
5161
5162 /* Rewrite the uses using the selected induction variables.  */
5163
5164 static void
5165 rewrite_uses (struct ivopts_data *data)
5166 {
5167   unsigned i;
5168   struct iv_cand *cand;
5169   struct iv_use *use;
5170
5171   for (i = 0; i < n_iv_uses (data); i++)
5172     {
5173       use = iv_use (data, i);
5174       cand = use->selected;
5175       gcc_assert (cand);
5176
5177       rewrite_use (data, use, cand);
5178     }
5179 }
5180
5181 /* Removes the ivs that are not used after rewriting.  */
5182
5183 static void
5184 remove_unused_ivs (struct ivopts_data *data)
5185 {
5186   unsigned j;
5187   bitmap_iterator bi;
5188
5189   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5190     {
5191       struct version_info *info;
5192
5193       info = ver_info (data, j);
5194       if (info->iv
5195           && !integer_zerop (info->iv->step)
5196           && !info->inv_id
5197           && !info->iv->have_use_for
5198           && !info->preserve_biv)
5199         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5200     }
5201 }
5202
5203 /* Frees data allocated by the optimization of a single loop.  */
5204
5205 static void
5206 free_loop_data (struct ivopts_data *data)
5207 {
5208   unsigned i, j;
5209   bitmap_iterator bi;
5210   tree obj;
5211
5212   if (data->niters)
5213     {
5214       pointer_map_destroy (data->niters);
5215       data->niters = NULL;
5216     }
5217
5218   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5219     {
5220       struct version_info *info;
5221
5222       info = ver_info (data, i);
5223       if (info->iv)
5224         free (info->iv);
5225       info->iv = NULL;
5226       info->has_nonlin_use = false;
5227       info->preserve_biv = false;
5228       info->inv_id = 0;
5229     }
5230   bitmap_clear (data->relevant);
5231   bitmap_clear (data->important_candidates);
5232
5233   for (i = 0; i < n_iv_uses (data); i++)
5234     {
5235       struct iv_use *use = iv_use (data, i);
5236
5237       free (use->iv);
5238       BITMAP_FREE (use->related_cands);
5239       for (j = 0; j < use->n_map_members; j++)
5240         if (use->cost_map[j].depends_on)
5241           BITMAP_FREE (use->cost_map[j].depends_on);
5242       free (use->cost_map);
5243       free (use);
5244     }
5245   VEC_truncate (iv_use_p, data->iv_uses, 0);
5246
5247   for (i = 0; i < n_iv_cands (data); i++)
5248     {
5249       struct iv_cand *cand = iv_cand (data, i);
5250
5251       if (cand->iv)
5252         free (cand->iv);
5253       if (cand->depends_on)
5254         BITMAP_FREE (cand->depends_on);
5255       free (cand);
5256     }
5257   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5258
5259   if (data->version_info_size < num_ssa_names)
5260     {
5261       data->version_info_size = 2 * num_ssa_names;
5262       free (data->version_info);
5263       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5264     }
5265
5266   data->max_inv_id = 0;
5267
5268   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5269     SET_DECL_RTL (obj, NULL_RTX);
5270
5271   VEC_truncate (tree, decl_rtl_to_reset, 0);
5272 }
5273
5274 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5275    loop tree.  */
5276
5277 static void
5278 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5279 {
5280   free_loop_data (data);
5281   free (data->version_info);
5282   BITMAP_FREE (data->relevant);
5283   BITMAP_FREE (data->important_candidates);
5284
5285   VEC_free (tree, heap, decl_rtl_to_reset);
5286   VEC_free (iv_use_p, heap, data->iv_uses);
5287   VEC_free (iv_cand_p, heap, data->iv_candidates);
5288 }
5289
5290 /* Optimizes the LOOP.  Returns true if anything changed.  */
5291
5292 static bool
5293 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5294 {
5295   bool changed = false;
5296   struct iv_ca *iv_ca;
5297   edge exit;
5298
5299   gcc_assert (!data->niters);
5300   data->current_loop = loop;
5301
5302   if (dump_file && (dump_flags & TDF_DETAILS))
5303     {
5304       fprintf (dump_file, "Processing loop %d\n", loop->num);
5305
5306       exit = single_dom_exit (loop);
5307       if (exit)
5308         {
5309           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5310                    exit->src->index, exit->dest->index);
5311           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5312           fprintf (dump_file, "\n");
5313         }
5314
5315       fprintf (dump_file, "\n");
5316     }
5317
5318   /* For each ssa name determines whether it behaves as an induction variable
5319      in some loop.  */
5320   if (!find_induction_variables (data))
5321     goto finish;
5322
5323   /* Finds interesting uses (item 1).  */
5324   find_interesting_uses (data);
5325   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5326     goto finish;
5327
5328   /* Finds candidates for the induction variables (item 2).  */
5329   find_iv_candidates (data);
5330
5331   /* Calculates the costs (item 3, part 1).  */
5332   determine_use_iv_costs (data);
5333   determine_iv_costs (data);
5334   determine_set_costs (data);
5335
5336   /* Find the optimal set of induction variables (item 3, part 2).  */
5337   iv_ca = find_optimal_iv_set (data);
5338   if (!iv_ca)
5339     goto finish;
5340   changed = true;
5341
5342   /* Create the new induction variables (item 4, part 1).  */
5343   create_new_ivs (data, iv_ca);
5344   iv_ca_free (&iv_ca);
5345
5346   /* Rewrite the uses (item 4, part 2).  */
5347   rewrite_uses (data);
5348
5349   /* Remove the ivs that are unused after rewriting.  */
5350   remove_unused_ivs (data);
5351
5352   /* We have changed the structure of induction variables; it might happen
5353      that definitions in the scev database refer to some of them that were
5354      eliminated.  */
5355   scev_reset ();
5356
5357 finish:
5358   free_loop_data (data);
5359
5360   return changed;
5361 }
5362
5363 /* Main entry point.  Optimizes induction variables in loops.  */
5364
5365 void
5366 tree_ssa_iv_optimize (void)
5367 {
5368   struct loop *loop;
5369   struct ivopts_data data;
5370   loop_iterator li;
5371
5372   tree_ssa_iv_optimize_init (&data);
5373
5374   /* Optimize the loops starting with the innermost ones.  */
5375   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5376     {
5377       if (dump_file && (dump_flags & TDF_DETAILS))
5378         flow_loop_dump (loop, dump_file, NULL, 1);
5379
5380       tree_ssa_iv_optimize_loop (&data, loop);
5381     }
5382
5383   tree_ssa_iv_optimize_finalize (&data);
5384 }