gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "rtl.h"
  70 #include "tm_p.h"
  71 #include "hard-reg-set.h"
  72 #include "basic-block.h"
  73 #include "output.h"
  74 #include "diagnostic.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "varray.h"
  80 #include "expr.h"
  81 #include "tree-pass.h"
  82 #include "ggc.h"
  83 #include "insn-config.h"
  84 #include "recog.h"
  85 #include "pointer-set.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92 #include "tree-affine.h"
  93 #include "target.h"
  94
  95 /* The infinite cost.  */
  96 #define INFTY 10000000
  97
  98 /* The expected number of loop iterations.  TODO -- use profiling instead of
  99    this.  */
 100 #define AVG_LOOP_NITER(LOOP) 5
 101
 102
 103 /* Representation of the induction variable.  */
 104 struct iv
 105 {
 106   tree base;            /* Initial value of the iv.  */
 107   tree base_object;     /* A memory object to that the induction variable points.  */
 108   tree step;            /* Step of the iv (constant only).  */
 109   tree ssa_name;        /* The ssa name with the value.  */
 110   bool biv_p;           /* Is it a biv?  */
 111   bool have_use_for;    /* Do we already have a use for it?  */
 112   unsigned use_id;      /* The identifier in the use if it is the case.  */
 113 };
 114
 115 /* Per-ssa version information (induction variable descriptions, etc.).  */
 116 struct version_info
 117 {
 118   tree name;            /* The ssa name.  */
 119   struct iv *iv;        /* Induction variable description.  */
 120   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 121                            an expression that is not an induction variable.  */
 122   unsigned inv_id;      /* Id of an invariant.  */
 123   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 124 };
 125
 126 /* Types of uses.  */
 127 enum use_type
 128 {
 129   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 130   USE_ADDRESS,          /* Use in an address.  */
 131   USE_COMPARE           /* Use is a compare.  */
 132 };
 133
 134 /* Cost of a computation.  */
 135 typedef struct
 136 {
 137   unsigned cost;        /* The runtime cost.  */
 138   unsigned complexity;  /* The estimate of the complexity of the code for
 139                            the computation (in no concrete units --
 140                            complexity field should be larger for more
 141                            complex expressions and addressing modes).  */
 142 } comp_cost;
 143
 144 static const comp_cost zero_cost = {0, 0};
 145 static const comp_cost infinite_cost = {INFTY, INFTY};
 146
 147 /* The candidate - cost pair.  */
 148 struct cost_pair
 149 {
 150   struct iv_cand *cand; /* The candidate.  */
 151   comp_cost cost;       /* The cost.  */
 152   bitmap depends_on;    /* The list of invariants that have to be
 153                            preserved.  */
 154   tree value;           /* For final value elimination, the expression for
 155                            the final value of the iv.  For iv elimination,
 156                            the new bound to compare with.  */
 157 };
 158
 159 /* Use.  */
 160 struct iv_use
 161 {
 162   unsigned id;          /* The id of the use.  */
 163   enum use_type type;   /* Type of the use.  */
 164   struct iv *iv;        /* The induction variable it is based on.  */
 165   tree stmt;            /* Statement in that it occurs.  */
 166   tree *op_p;           /* The place where it occurs.  */
 167   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 168                            important ones.  */
 169
 170   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 171   struct cost_pair *cost_map;
 172                         /* The costs wrto the iv candidates.  */
 173
 174   struct iv_cand *selected;
 175                         /* The selected candidate.  */
 176 };
 177
 178 /* The position where the iv is computed.  */
 179 enum iv_position
 180 {
 181   IP_NORMAL,            /* At the end, just before the exit condition.  */
 182   IP_END,               /* At the end of the latch block.  */
 183   IP_ORIGINAL           /* The original biv.  */
 184 };
 185
 186 /* The induction variable candidate.  */
 187 struct iv_cand
 188 {
 189   unsigned id;          /* The number of the candidate.  */
 190   bool important;       /* Whether this is an "important" candidate, i.e. such
 191                            that it should be considered by all uses.  */
 192   enum iv_position pos; /* Where it is computed.  */
 193   tree incremented_at;  /* For original biv, the statement where it is
 194                            incremented.  */
 195   tree var_before;      /* The variable used for it before increment.  */
 196   tree var_after;       /* The variable used for it after increment.  */
 197   struct iv *iv;        /* The value of the candidate.  NULL for
 198                            "pseudocandidate" used to indicate the possibility
 199                            to replace the final value of an iv by direct
 200                            computation of the value.  */
 201   unsigned cost;        /* Cost of the candidate.  */
 202   bitmap depends_on;    /* The list of invariants that are used in step of the
 203                            biv.  */
 204 };
 205
 206 /* The data used by the induction variable optimizations.  */
 207
 208 typedef struct iv_use *iv_use_p;
 209 DEF_VEC_P(iv_use_p);
 210 DEF_VEC_ALLOC_P(iv_use_p,heap);
 211
 212 typedef struct iv_cand *iv_cand_p;
 213 DEF_VEC_P(iv_cand_p);
 214 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 215
 216 struct ivopts_data
 217 {
 218   /* The currently optimized loop.  */
 219   struct loop *current_loop;
 220
 221   /* Number of registers used in it.  */
 222   unsigned regs_used;
 223
 224   /* Numbers of iterations for all exits of the current loop.  */
 225   struct pointer_map_t *niters;
 226
 227   /* The size of version_info array allocated.  */
 228   unsigned version_info_size;
 229
 230   /* The array of information for the ssa names.  */
 231   struct version_info *version_info;
 232
 233   /* The bitmap of indices in version_info whose value was changed.  */
 234   bitmap relevant;
 235
 236   /* The maximum invariant id.  */
 237   unsigned max_inv_id;
 238
 239   /* The uses of induction variables.  */
 240   VEC(iv_use_p,heap) *iv_uses;
 241
 242   /* The candidates.  */
 243   VEC(iv_cand_p,heap) *iv_candidates;
 244
 245   /* A bitmap of important candidates.  */
 246   bitmap important_candidates;
 247
 248   /* Whether to consider just related and important candidates when replacing a
 249      use.  */
 250   bool consider_all_candidates;
 251 };
 252
 253 /* An assignment of iv candidates to uses.  */
 254
 255 struct iv_ca
 256 {
 257   /* The number of uses covered by the assignment.  */
 258   unsigned upto;
 259
 260   /* Number of uses that cannot be expressed by the candidates in the set.  */
 261   unsigned bad_uses;
 262
 263   /* Candidate assigned to a use, together with the related costs.  */
 264   struct cost_pair **cand_for_use;
 265
 266   /* Number of times each candidate is used.  */
 267   unsigned *n_cand_uses;
 268
 269   /* The candidates used.  */
 270   bitmap cands;
 271
 272   /* The number of candidates in the set.  */
 273   unsigned n_cands;
 274
 275   /* Total number of registers needed.  */
 276   unsigned n_regs;
 277
 278   /* Total cost of expressing uses.  */
 279   comp_cost cand_use_cost;
 280
 281   /* Total cost of candidates.  */
 282   unsigned cand_cost;
 283
 284   /* Number of times each invariant is used.  */
 285   unsigned *n_invariant_uses;
 286
 287   /* Total cost of the assignment.  */
 288   comp_cost cost;
 289 };
 290
 291 /* Difference of two iv candidate assignments.  */
 292
 293 struct iv_ca_delta
 294 {
 295   /* Changed use.  */
 296   struct iv_use *use;
 297
 298   /* An old assignment (for rollback purposes).  */
 299   struct cost_pair *old_cp;
 300
 301   /* A new assignment.  */
 302   struct cost_pair *new_cp;
 303
 304   /* Next change in the list.  */
 305   struct iv_ca_delta *next_change;
 306 };
 307
 308 /* Bound on number of candidates below that all candidates are considered.  */
 309
 310 #define CONSIDER_ALL_CANDIDATES_BOUND \
 311   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 312
 313 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 314    optimizing such a loop would help, and it would take ages).  */
 315
 316 #define MAX_CONSIDERED_USES \
 317   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 318
 319 /* If there are at most this number of ivs in the set, try removing unnecessary
 320    ivs from the set always.  */
 321
 322 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 323   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 324
 325 /* The list of trees for that the decl_rtl field must be reset is stored
 326    here.  */
 327
 328 static VEC(tree,heap) *decl_rtl_to_reset;
 329
 330 /* Number of uses recorded in DATA.  */
 331
 332 static inline unsigned
 333 n_iv_uses (struct ivopts_data *data)
 334 {
 335   return VEC_length (iv_use_p, data->iv_uses);
 336 }
 337
 338 /* Ith use recorded in DATA.  */
 339
 340 static inline struct iv_use *
 341 iv_use (struct ivopts_data *data, unsigned i)
 342 {
 343   return VEC_index (iv_use_p, data->iv_uses, i);
 344 }
 345
 346 /* Number of candidates recorded in DATA.  */
 347
 348 static inline unsigned
 349 n_iv_cands (struct ivopts_data *data)
 350 {
 351   return VEC_length (iv_cand_p, data->iv_candidates);
 352 }
 353
 354 /* Ith candidate recorded in DATA.  */
 355
 356 static inline struct iv_cand *
 357 iv_cand (struct ivopts_data *data, unsigned i)
 358 {
 359   return VEC_index (iv_cand_p, data->iv_candidates, i);
 360 }
 361
 362 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 363
 364 edge
 365 single_dom_exit (struct loop *loop)
 366 {
 367   edge exit = single_exit (loop);
 368
 369   if (!exit)
 370     return NULL;
 371
 372   if (!just_once_each_iteration_p (loop, exit->src))
 373     return NULL;
 374
 375   return exit;
 376 }
 377
 378 /* Dumps information about the induction variable IV to FILE.  */
 379
 380 extern void dump_iv (FILE *, struct iv *);
 381 void
 382 dump_iv (FILE *file, struct iv *iv)
 383 {
 384   if (iv->ssa_name)
 385     {
 386       fprintf (file, "ssa name ");
 387       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 388       fprintf (file, "\n");
 389     }
 390
 391   fprintf (file, "  type ");
 392   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 393   fprintf (file, "\n");
 394
 395   if (iv->step)
 396     {
 397       fprintf (file, "  base ");
 398       print_generic_expr (file, iv->base, TDF_SLIM);
 399       fprintf (file, "\n");
 400
 401       fprintf (file, "  step ");
 402       print_generic_expr (file, iv->step, TDF_SLIM);
 403       fprintf (file, "\n");
 404     }
 405   else
 406     {
 407       fprintf (file, "  invariant ");
 408       print_generic_expr (file, iv->base, TDF_SLIM);
 409       fprintf (file, "\n");
 410     }
 411
 412   if (iv->base_object)
 413     {
 414       fprintf (file, "  base object ");
 415       print_generic_expr (file, iv->base_object, TDF_SLIM);
 416       fprintf (file, "\n");
 417     }
 418
 419   if (iv->biv_p)
 420     fprintf (file, "  is a biv\n");
 421 }
 422
 423 /* Dumps information about the USE to FILE.  */
 424
 425 extern void dump_use (FILE *, struct iv_use *);
 426 void
 427 dump_use (FILE *file, struct iv_use *use)
 428 {
 429   fprintf (file, "use %d\n", use->id);
 430
 431   switch (use->type)
 432     {
 433     case USE_NONLINEAR_EXPR:
 434       fprintf (file, "  generic\n");
 435       break;
 436
 437     case USE_ADDRESS:
 438       fprintf (file, "  address\n");
 439       break;
 440
 441     case USE_COMPARE:
 442       fprintf (file, "  compare\n");
 443       break;
 444
 445     default:
 446       gcc_unreachable ();
 447     }
 448
 449   fprintf (file, "  in statement ");
 450   print_generic_expr (file, use->stmt, TDF_SLIM);
 451   fprintf (file, "\n");
 452
 453   fprintf (file, "  at position ");
 454   if (use->op_p)
 455     print_generic_expr (file, *use->op_p, TDF_SLIM);
 456   fprintf (file, "\n");
 457
 458   dump_iv (file, use->iv);
 459
 460   if (use->related_cands)
 461     {
 462       fprintf (file, "  related candidates ");
 463       dump_bitmap (file, use->related_cands);
 464     }
 465 }
 466
 467 /* Dumps information about the uses to FILE.  */
 468
 469 extern void dump_uses (FILE *, struct ivopts_data *);
 470 void
 471 dump_uses (FILE *file, struct ivopts_data *data)
 472 {
 473   unsigned i;
 474   struct iv_use *use;
 475
 476   for (i = 0; i < n_iv_uses (data); i++)
 477     {
 478       use = iv_use (data, i);
 479
 480       dump_use (file, use);
 481       fprintf (file, "\n");
 482     }
 483 }
 484
 485 /* Dumps information about induction variable candidate CAND to FILE.  */
 486
 487 extern void dump_cand (FILE *, struct iv_cand *);
 488 void
 489 dump_cand (FILE *file, struct iv_cand *cand)
 490 {
 491   struct iv *iv = cand->iv;
 492
 493   fprintf (file, "candidate %d%s\n",
 494            cand->id, cand->important ? " (important)" : "");
 495
 496   if (cand->depends_on)
 497     {
 498       fprintf (file, "  depends on ");
 499       dump_bitmap (file, cand->depends_on);
 500     }
 501
 502   if (!iv)
 503     {
 504       fprintf (file, "  final value replacement\n");
 505       return;
 506     }
 507
 508   switch (cand->pos)
 509     {
 510     case IP_NORMAL:
 511       fprintf (file, "  incremented before exit test\n");
 512       break;
 513
 514     case IP_END:
 515       fprintf (file, "  incremented at end\n");
 516       break;
 517
 518     case IP_ORIGINAL:
 519       fprintf (file, "  original biv\n");
 520       break;
 521     }
 522
 523   dump_iv (file, iv);
 524 }
 525
 526 /* Returns the info for ssa version VER.  */
 527
 528 static inline struct version_info *
 529 ver_info (struct ivopts_data *data, unsigned ver)
 530 {
 531   return data->version_info + ver;
 532 }
 533
 534 /* Returns the info for ssa name NAME.  */
 535
 536 static inline struct version_info *
 537 name_info (struct ivopts_data *data, tree name)
 538 {
 539   return ver_info (data, SSA_NAME_VERSION (name));
 540 }
 541
 542 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 543    emitted in LOOP.  */
 544
 545 static bool
 546 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 547 {
 548   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 549
 550   gcc_assert (bb);
 551
 552   if (sbb == loop->latch)
 553     return true;
 554
 555   if (sbb != bb)
 556     return false;
 557
 558   return stmt == last_stmt (bb);
 559 }
 560
 561 /* Returns true if STMT if after the place where the original induction
 562    variable CAND is incremented.  */
 563
 564 static bool
 565 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 566 {
 567   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 568   basic_block stmt_bb = bb_for_stmt (stmt);
 569   block_stmt_iterator bsi;
 570
 571   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 572     return false;
 573
 574   if (stmt_bb != cand_bb)
 575     return true;
 576
 577   /* Scan the block from the end, since the original ivs are usually
 578      incremented at the end of the loop body.  */
 579   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 580     {
 581       if (bsi_stmt (bsi) == cand->incremented_at)
 582         return false;
 583       if (bsi_stmt (bsi) == stmt)
 584         return true;
 585     }
 586 }
 587
 588 /* Returns true if STMT if after the place where the induction variable
 589    CAND is incremented in LOOP.  */
 590
 591 static bool
 592 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 593 {
 594   switch (cand->pos)
 595     {
 596     case IP_END:
 597       return false;
 598
 599     case IP_NORMAL:
 600       return stmt_after_ip_normal_pos (loop, stmt);
 601
 602     case IP_ORIGINAL:
 603       return stmt_after_ip_original_pos (cand, stmt);
 604
 605     default:
 606       gcc_unreachable ();
 607     }
 608 }
 609
 610 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 611
 612 static bool
 613 abnormal_ssa_name_p (tree exp)
 614 {
 615   if (!exp)
 616     return false;
 617
 618   if (TREE_CODE (exp) != SSA_NAME)
 619     return false;
 620
 621   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 622 }
 623
 624 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 625    abnormal phi node.  Callback for for_each_index.  */
 626
 627 static bool
 628 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 629                                   void *data ATTRIBUTE_UNUSED)
 630 {
 631   if (TREE_CODE (base) == ARRAY_REF)
 632     {
 633       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 634         return false;
 635       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 636         return false;
 637     }
 638
 639   return !abnormal_ssa_name_p (*index);
 640 }
 641
 642 /* Returns true if EXPR contains a ssa name that occurs in an
 643    abnormal phi node.  */
 644
 645 bool
 646 contains_abnormal_ssa_name_p (tree expr)
 647 {
 648   enum tree_code code;
 649   enum tree_code_class codeclass;
 650
 651   if (!expr)
 652     return false;
 653
 654   code = TREE_CODE (expr);
 655   codeclass = TREE_CODE_CLASS (code);
 656
 657   if (code == SSA_NAME)
 658     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 659
 660   if (code == INTEGER_CST
 661       || is_gimple_min_invariant (expr))
 662     return false;
 663
 664   if (code == ADDR_EXPR)
 665     return !for_each_index (&TREE_OPERAND (expr, 0),
 666                             idx_contains_abnormal_ssa_name_p,
 667                             NULL);
 668
 669   switch (codeclass)
 670     {
 671     case tcc_binary:
 672     case tcc_comparison:
 673       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 674         return true;
 675
 676       /* Fallthru.  */
 677     case tcc_unary:
 678       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 679         return true;
 680
 681       break;
 682
 683     default:
 684       gcc_unreachable ();
 685     }
 686
 687   return false;
 688 }
 689
 690 /*  Returns tree describing number of iterations determined from
 691     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 692
 693 static tree
 694 niter_for_exit (struct ivopts_data *data, edge exit)
 695 {
 696   struct tree_niter_desc desc;
 697   tree niter;
 698   void **slot;
 699
 700   if (!data->niters)
 701     {
 702       data->niters = pointer_map_create ();
 703       slot = NULL;
 704     }
 705   else
 706     slot = pointer_map_contains (data->niters, exit);
 707
 708   if (!slot)
 709     {
 710       /* Try to determine number of iterations.  We must know it
 711          unconditionally (i.e., without possibility of # of iterations
 712          being zero).  Also, we cannot safely work with ssa names that
 713          appear in phi nodes on abnormal edges, so that we do not create
 714          overlapping life ranges for them (PR 27283).  */
 715       if (number_of_iterations_exit (data->current_loop,
 716                                      exit, &desc, true)
 717           && integer_zerop (desc.may_be_zero)
 718           && !contains_abnormal_ssa_name_p (desc.niter))
 719         niter = desc.niter;
 720       else
 721         niter = NULL_TREE;
 722
 723       *pointer_map_insert (data->niters, exit) = niter;
 724     }
 725   else
 726     niter = (tree) *slot;
 727
 728   return niter;
 729 }
 730
 731 /* Returns tree describing number of iterations determined from
 732    single dominating exit of DATA->current_loop, or NULL if something
 733    goes wrong.  */
 734
 735 static tree
 736 niter_for_single_dom_exit (struct ivopts_data *data)
 737 {
 738   edge exit = single_dom_exit (data->current_loop);
 739
 740   if (!exit)
 741     return NULL;
 742
 743   return niter_for_exit (data, exit);
 744 }
 745
 746 /* Initializes data structures used by the iv optimization pass, stored
 747    in DATA.  */
 748
 749 static void
 750 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 751 {
 752   data->version_info_size = 2 * num_ssa_names;
 753   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 754   data->relevant = BITMAP_ALLOC (NULL);
 755   data->important_candidates = BITMAP_ALLOC (NULL);
 756   data->max_inv_id = 0;
 757   data->niters = NULL;
 758   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 759   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 760   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 761 }
 762
 763 /* Returns a memory object to that EXPR points.  In case we are able to
 764    determine that it does not point to any such object, NULL is returned.  */
 765
 766 static tree
 767 determine_base_object (tree expr)
 768 {
 769   enum tree_code code = TREE_CODE (expr);
 770   tree base, obj;
 771
 772   /* If this is a pointer casted to any type, we need to determine
 773      the base object for the pointer; so handle conversions before
 774      throwing away non-pointer expressions.  */
 775   if (TREE_CODE (expr) == NOP_EXPR
 776       || TREE_CODE (expr) == CONVERT_EXPR)
 777     return determine_base_object (TREE_OPERAND (expr, 0));
 778
 779   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 780     return NULL_TREE;
 781
 782   switch (code)
 783     {
 784     case INTEGER_CST:
 785       return NULL_TREE;
 786
 787     case ADDR_EXPR:
 788       obj = TREE_OPERAND (expr, 0);
 789       base = get_base_address (obj);
 790
 791       if (!base)
 792         return expr;
 793
 794       if (TREE_CODE (base) == INDIRECT_REF)
 795         return determine_base_object (TREE_OPERAND (base, 0));
 796
 797       return fold_convert (ptr_type_node,
 798                            build_fold_addr_expr (base));
 799
 800     case POINTER_PLUS_EXPR:
 801       return determine_base_object (TREE_OPERAND (expr, 0));
 802
 803     case PLUS_EXPR:
 804     case MINUS_EXPR:
 805       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 806       gcc_unreachable ();
 807
 808     default:
 809       return fold_convert (ptr_type_node, expr);
 810     }
 811 }
 812
 813 /* Allocates an induction variable with given initial value BASE and step STEP
 814    for loop LOOP.  */
 815
 816 static struct iv *
 817 alloc_iv (tree base, tree step)
 818 {
 819   struct iv *iv = XCNEW (struct iv);
 820   gcc_assert (step != NULL_TREE);
 821
 822   iv->base = base;
 823   iv->base_object = determine_base_object (base);
 824   iv->step = step;
 825   iv->biv_p = false;
 826   iv->have_use_for = false;
 827   iv->use_id = 0;
 828   iv->ssa_name = NULL_TREE;
 829
 830   return iv;
 831 }
 832
 833 /* Sets STEP and BASE for induction variable IV.  */
 834
 835 static void
 836 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 837 {
 838   struct version_info *info = name_info (data, iv);
 839
 840   gcc_assert (!info->iv);
 841
 842   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 843   info->iv = alloc_iv (base, step);
 844   info->iv->ssa_name = iv;
 845 }
 846
 847 /* Finds induction variable declaration for VAR.  */
 848
 849 static struct iv *
 850 get_iv (struct ivopts_data *data, tree var)
 851 {
 852   basic_block bb;
 853   tree type = TREE_TYPE (var);
 854
 855   if (!POINTER_TYPE_P (type)
 856       && !INTEGRAL_TYPE_P (type))
 857     return NULL;
 858
 859   if (!name_info (data, var)->iv)
 860     {
 861       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 862
 863       if (!bb
 864           || !flow_bb_inside_loop_p (data->current_loop, bb))
 865         set_iv (data, var, var, build_int_cst (type, 0));
 866     }
 867
 868   return name_info (data, var)->iv;
 869 }
 870
 871 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 872    not define a simple affine biv with nonzero step.  */
 873
 874 static tree
 875 determine_biv_step (tree phi)
 876 {
 877   struct loop *loop = bb_for_stmt (phi)->loop_father;
 878   tree name = PHI_RESULT (phi);
 879   affine_iv iv;
 880
 881   if (!is_gimple_reg (name))
 882     return NULL_TREE;
 883
 884   if (!simple_iv (loop, phi, name, &iv, true))
 885     return NULL_TREE;
 886
 887   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 888 }
 889
 890 /* Finds basic ivs.  */
 891
 892 static bool
 893 find_bivs (struct ivopts_data *data)
 894 {
 895   tree phi, step, type, base;
 896   bool found = false;
 897   struct loop *loop = data->current_loop;
 898
 899   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 900     {
 901       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 902         continue;
 903
 904       step = determine_biv_step (phi);
 905       if (!step)
 906         continue;
 907
 908       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 909       base = expand_simple_operations (base);
 910       if (contains_abnormal_ssa_name_p (base)
 911           || contains_abnormal_ssa_name_p (step))
 912         continue;
 913
 914       type = TREE_TYPE (PHI_RESULT (phi));
 915       base = fold_convert (type, base);
 916       if (step)
 917         {
 918           if (POINTER_TYPE_P (type))
 919             step = fold_convert (sizetype, step);
 920           else
 921             step = fold_convert (type, step);
 922         }
 923
 924       set_iv (data, PHI_RESULT (phi), base, step);
 925       found = true;
 926     }
 927
 928   return found;
 929 }
 930
 931 /* Marks basic ivs.  */
 932
 933 static void
 934 mark_bivs (struct ivopts_data *data)
 935 {
 936   tree phi, var;
 937   struct iv *iv, *incr_iv;
 938   struct loop *loop = data->current_loop;
 939   basic_block incr_bb;
 940
 941   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 942     {
 943       iv = get_iv (data, PHI_RESULT (phi));
 944       if (!iv)
 945         continue;
 946
 947       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 948       incr_iv = get_iv (data, var);
 949       if (!incr_iv)
 950         continue;
 951
 952       /* If the increment is in the subloop, ignore it.  */
 953       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 954       if (incr_bb->loop_father != data->current_loop
 955           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 956         continue;
 957
 958       iv->biv_p = true;
 959       incr_iv->biv_p = true;
 960     }
 961 }
 962
 963 /* Checks whether STMT defines a linear induction variable and stores its
 964    parameters to IV.  */
 965
 966 static bool
 967 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 968 {
 969   tree lhs;
 970   struct loop *loop = data->current_loop;
 971
 972   iv->base = NULL_TREE;
 973   iv->step = NULL_TREE;
 974
 975   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 976     return false;
 977
 978   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 979   if (TREE_CODE (lhs) != SSA_NAME)
 980     return false;
 981
 982   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 983     return false;
 984   iv->base = expand_simple_operations (iv->base);
 985
 986   if (contains_abnormal_ssa_name_p (iv->base)
 987       || contains_abnormal_ssa_name_p (iv->step))
 988     return false;
 989
 990   return true;
 991 }
 992
 993 /* Finds general ivs in statement STMT.  */
 994
 995 static void
 996 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
 997 {
 998   affine_iv iv;
 999
1000   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1001     return;
1002
1003   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1004 }
1005
1006 /* Finds general ivs in basic block BB.  */
1007
1008 static void
1009 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1010 {
1011   block_stmt_iterator bsi;
1012
1013   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1014     find_givs_in_stmt (data, bsi_stmt (bsi));
1015 }
1016
1017 /* Finds general ivs.  */
1018
1019 static void
1020 find_givs (struct ivopts_data *data)
1021 {
1022   struct loop *loop = data->current_loop;
1023   basic_block *body = get_loop_body_in_dom_order (loop);
1024   unsigned i;
1025
1026   for (i = 0; i < loop->num_nodes; i++)
1027     find_givs_in_bb (data, body[i]);
1028   free (body);
1029 }
1030
1031 /* For each ssa name defined in LOOP determines whether it is an induction
1032    variable and if so, its initial value and step.  */
1033
1034 static bool
1035 find_induction_variables (struct ivopts_data *data)
1036 {
1037   unsigned i;
1038   bitmap_iterator bi;
1039
1040   if (!find_bivs (data))
1041     return false;
1042
1043   find_givs (data);
1044   mark_bivs (data);
1045
1046   if (dump_file && (dump_flags & TDF_DETAILS))
1047     {
1048       tree niter = niter_for_single_dom_exit (data);
1049
1050       if (niter)
1051         {
1052           fprintf (dump_file, "  number of iterations ");
1053           print_generic_expr (dump_file, niter, TDF_SLIM);
1054           fprintf (dump_file, "\n\n");
1055         };
1056
1057       fprintf (dump_file, "Induction variables:\n\n");
1058
1059       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1060         {
1061           if (ver_info (data, i)->iv)
1062             dump_iv (dump_file, ver_info (data, i)->iv);
1063         }
1064     }
1065
1066   return true;
1067 }
1068
1069 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1070
1071 static struct iv_use *
1072 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1073             tree stmt, enum use_type use_type)
1074 {
1075   struct iv_use *use = XCNEW (struct iv_use);
1076
1077   use->id = n_iv_uses (data);
1078   use->type = use_type;
1079   use->iv = iv;
1080   use->stmt = stmt;
1081   use->op_p = use_p;
1082   use->related_cands = BITMAP_ALLOC (NULL);
1083
1084   /* To avoid showing ssa name in the dumps, if it was not reset by the
1085      caller.  */
1086   iv->ssa_name = NULL_TREE;
1087
1088   if (dump_file && (dump_flags & TDF_DETAILS))
1089     dump_use (dump_file, use);
1090
1091   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1092
1093   return use;
1094 }
1095
1096 /* Checks whether OP is a loop-level invariant and if so, records it.
1097    NONLINEAR_USE is true if the invariant is used in a way we do not
1098    handle specially.  */
1099
1100 static void
1101 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1102 {
1103   basic_block bb;
1104   struct version_info *info;
1105
1106   if (TREE_CODE (op) != SSA_NAME
1107       || !is_gimple_reg (op))
1108     return;
1109
1110   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1111   if (bb
1112       && flow_bb_inside_loop_p (data->current_loop, bb))
1113     return;
1114
1115   info = name_info (data, op);
1116   info->name = op;
1117   info->has_nonlin_use |= nonlinear_use;
1118   if (!info->inv_id)
1119     info->inv_id = ++data->max_inv_id;
1120   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1121 }
1122
1123 /* Checks whether the use OP is interesting and if so, records it.  */
1124
1125 static struct iv_use *
1126 find_interesting_uses_op (struct ivopts_data *data, tree op)
1127 {
1128   struct iv *iv;
1129   struct iv *civ;
1130   tree stmt;
1131   struct iv_use *use;
1132
1133   if (TREE_CODE (op) != SSA_NAME)
1134     return NULL;
1135
1136   iv = get_iv (data, op);
1137   if (!iv)
1138     return NULL;
1139
1140   if (iv->have_use_for)
1141     {
1142       use = iv_use (data, iv->use_id);
1143
1144       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1145       return use;
1146     }
1147
1148   if (integer_zerop (iv->step))
1149     {
1150       record_invariant (data, op, true);
1151       return NULL;
1152     }
1153   iv->have_use_for = true;
1154
1155   civ = XNEW (struct iv);
1156   *civ = *iv;
1157
1158   stmt = SSA_NAME_DEF_STMT (op);
1159   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1160               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1161
1162   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1163   iv->use_id = use->id;
1164
1165   return use;
1166 }
1167
1168 /* Given a condition *COND_P, checks whether it is a compare of an induction
1169    variable and an invariant.  If this is the case, CONTROL_VAR is set
1170    to location of the iv, BOUND to the location of the invariant,
1171    IV_VAR and IV_BOUND are set to the corresponding induction variable
1172    descriptions, and true is returned.  If this is not the case,
1173    CONTROL_VAR and BOUND are set to the arguments of the condition and
1174    false is returned.  */
1175
1176 static bool
1177 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1178                        tree **control_var, tree **bound,
1179                        struct iv **iv_var, struct iv **iv_bound)
1180 {
1181   /* The nodes returned when COND has just one operand.  Note that you should
1182      not modify anything in BOUND or IV_BOUND because of this.  */
1183   static struct iv const_iv;
1184   static tree zero;
1185   tree cond = *cond_p;
1186   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1187   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1188   bool ret = false;
1189
1190   zero = integer_zero_node;
1191   const_iv.step = integer_zero_node;
1192
1193   if (TREE_CODE (cond) == SSA_NAME)
1194     {
1195       op0 = cond_p;
1196       iv0 = get_iv (data, cond);
1197       ret = (iv0 && !integer_zerop (iv0->step));
1198       goto end;
1199     }
1200
1201   if (!COMPARISON_CLASS_P (cond))
1202     {
1203       op0 = cond_p;
1204       goto end;
1205     }
1206
1207   op0 = &TREE_OPERAND (cond, 0);
1208   op1 = &TREE_OPERAND (cond, 1);
1209   if (TREE_CODE (*op0) == SSA_NAME)
1210     iv0 = get_iv (data, *op0);
1211   if (TREE_CODE (*op1) == SSA_NAME)
1212     iv1 = get_iv (data, *op1);
1213
1214   /* Exactly one of the compared values must be an iv, and the other one must
1215      be an invariant.  */
1216   if (!iv0 || !iv1)
1217     goto end;
1218
1219   if (integer_zerop (iv0->step))
1220     {
1221       /* Control variable may be on the other side.  */
1222       tmp_op = op0; op0 = op1; op1 = tmp_op;
1223       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1224     }
1225   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1226
1227 end:
1228   if (control_var)
1229     *control_var = op0;;
1230   if (iv_var)
1231     *iv_var = iv0;;
1232   if (bound)
1233     *bound = op1;
1234   if (iv_bound)
1235     *iv_bound = iv1;
1236
1237   return ret;
1238 }
1239
1240 /* Checks whether the condition *COND_P in STMT is interesting
1241    and if so, records it.  */
1242
1243 static void
1244 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1245 {
1246   tree *var_p, *bound_p;
1247   struct iv *var_iv, *civ;
1248
1249   if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1250     {
1251       find_interesting_uses_op (data, *var_p);
1252       find_interesting_uses_op (data, *bound_p);
1253       return;
1254     }
1255
1256   civ = XNEW (struct iv);
1257   *civ = *var_iv;
1258   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1259 }
1260
1261 /* Returns true if expression EXPR is obviously invariant in LOOP,
1262    i.e. if all its operands are defined outside of the LOOP.  LOOP
1263    should not be the function body.  */
1264
1265 bool
1266 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1267 {
1268   basic_block def_bb;
1269   unsigned i, len;
1270
1271   gcc_assert (loop_depth (loop) > 0);
1272
1273   if (is_gimple_min_invariant (expr))
1274     return true;
1275
1276   if (TREE_CODE (expr) == SSA_NAME)
1277     {
1278       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1279       if (def_bb
1280           && flow_bb_inside_loop_p (loop, def_bb))
1281         return false;
1282
1283       return true;
1284     }
1285
1286   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1287     return false;
1288
1289   len = TREE_OPERAND_LENGTH (expr);
1290   for (i = 0; i < len; i++)
1291     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1292       return false;
1293
1294   return true;
1295 }
1296
1297 /* Cumulates the steps of indices into DATA and replaces their values with the
1298    initial ones.  Returns false when the value of the index cannot be determined.
1299    Callback for for_each_index.  */
1300
1301 struct ifs_ivopts_data
1302 {
1303   struct ivopts_data *ivopts_data;
1304   tree stmt;
1305   tree step;
1306 };
1307
1308 static bool
1309 idx_find_step (tree base, tree *idx, void *data)
1310 {
1311   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1312   struct iv *iv;
1313   tree step, iv_base, iv_step, lbound, off;
1314   struct loop *loop = dta->ivopts_data->current_loop;
1315
1316   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1317       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1318     return false;
1319
1320   /* If base is a component ref, require that the offset of the reference
1321      be invariant.  */
1322   if (TREE_CODE (base) == COMPONENT_REF)
1323     {
1324       off = component_ref_field_offset (base);
1325       return expr_invariant_in_loop_p (loop, off);
1326     }
1327
1328   /* If base is array, first check whether we will be able to move the
1329      reference out of the loop (in order to take its address in strength
1330      reduction).  In order for this to work we need both lower bound
1331      and step to be loop invariants.  */
1332   if (TREE_CODE (base) == ARRAY_REF)
1333     {
1334       step = array_ref_element_size (base);
1335       lbound = array_ref_low_bound (base);
1336
1337       if (!expr_invariant_in_loop_p (loop, step)
1338           || !expr_invariant_in_loop_p (loop, lbound))
1339         return false;
1340     }
1341
1342   if (TREE_CODE (*idx) != SSA_NAME)
1343     return true;
1344
1345   iv = get_iv (dta->ivopts_data, *idx);
1346   if (!iv)
1347     return false;
1348
1349   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1350           *&x[0], which is not folded and does not trigger the
1351           ARRAY_REF path below.  */
1352   *idx = iv->base;
1353
1354   if (integer_zerop (iv->step))
1355     return true;
1356
1357   if (TREE_CODE (base) == ARRAY_REF)
1358     {
1359       step = array_ref_element_size (base);
1360
1361       /* We only handle addresses whose step is an integer constant.  */
1362       if (TREE_CODE (step) != INTEGER_CST)
1363         return false;
1364     }
1365   else
1366     /* The step for pointer arithmetics already is 1 byte.  */
1367     step = build_int_cst (sizetype, 1);
1368
1369   iv_base = iv->base;
1370   iv_step = iv->step;
1371   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1372                             sizetype, &iv_base, &iv_step, dta->stmt,
1373                             false))
1374     {
1375       /* The index might wrap.  */
1376       return false;
1377     }
1378
1379   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1380   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1381
1382   return true;
1383 }
1384
1385 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1386    object is passed to it in DATA.  */
1387
1388 static bool
1389 idx_record_use (tree base, tree *idx,
1390                 void *vdata)
1391 {
1392   struct ivopts_data *data = (struct ivopts_data *) vdata;
1393   find_interesting_uses_op (data, *idx);
1394   if (TREE_CODE (base) == ARRAY_REF)
1395     {
1396       find_interesting_uses_op (data, array_ref_element_size (base));
1397       find_interesting_uses_op (data, array_ref_low_bound (base));
1398     }
1399   return true;
1400 }
1401
1402 /* If we can prove that TOP = cst * BOT for some constant cst,
1403    store cst to MUL and return true.  Otherwise return false.
1404    The returned value is always sign-extended, regardless of the
1405    signedness of TOP and BOT.  */
1406
1407 static bool
1408 constant_multiple_of (tree top, tree bot, double_int *mul)
1409 {
1410   tree mby;
1411   enum tree_code code;
1412   double_int res, p0, p1;
1413   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1414
1415   STRIP_NOPS (top);
1416   STRIP_NOPS (bot);
1417
1418   if (operand_equal_p (top, bot, 0))
1419     {
1420       *mul = double_int_one;
1421       return true;
1422     }
1423
1424   code = TREE_CODE (top);
1425   switch (code)
1426     {
1427     case MULT_EXPR:
1428       mby = TREE_OPERAND (top, 1);
1429       if (TREE_CODE (mby) != INTEGER_CST)
1430         return false;
1431
1432       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1433         return false;
1434
1435       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1436                               precision);
1437       return true;
1438
1439     case PLUS_EXPR:
1440     case MINUS_EXPR:
1441       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1442           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1443         return false;
1444
1445       if (code == MINUS_EXPR)
1446         p1 = double_int_neg (p1);
1447       *mul = double_int_sext (double_int_add (p0, p1), precision);
1448       return true;
1449
1450     case INTEGER_CST:
1451       if (TREE_CODE (bot) != INTEGER_CST)
1452         return false;
1453
1454       p0 = double_int_sext (tree_to_double_int (top), precision);
1455       p1 = double_int_sext (tree_to_double_int (bot), precision);
1456       if (double_int_zero_p (p1))
1457         return false;
1458       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1459                               precision);
1460       return double_int_zero_p (res);
1461
1462     default:
1463       return false;
1464     }
1465 }
1466
1467 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1468
1469 static bool
1470 may_be_unaligned_p (tree ref, tree step)
1471 {
1472   tree base;
1473   tree base_type;
1474   HOST_WIDE_INT bitsize;
1475   HOST_WIDE_INT bitpos;
1476   tree toffset;
1477   enum machine_mode mode;
1478   int unsignedp, volatilep;
1479   unsigned base_align;
1480
1481   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1482      thus they are not misaligned.  */
1483   if (TREE_CODE (ref) == TARGET_MEM_REF)
1484     return false;
1485
1486   /* The test below is basically copy of what expr.c:normal_inner_ref
1487      does to check whether the object must be loaded by parts when
1488      STRICT_ALIGNMENT is true.  */
1489   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1490                               &unsignedp, &volatilep, true);
1491   base_type = TREE_TYPE (base);
1492   base_align = TYPE_ALIGN (base_type);
1493
1494   if (mode != BLKmode)
1495     {
1496       double_int mul;
1497       tree al = build_int_cst (TREE_TYPE (step),
1498                                GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1499
1500       if (base_align < GET_MODE_ALIGNMENT (mode)
1501           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1502           || bitpos % BITS_PER_UNIT != 0)
1503         return true;
1504
1505       if (!constant_multiple_of (step, al, &mul))
1506         return true;
1507     }
1508
1509   return false;
1510 }
1511
1512 /* Return true if EXPR may be non-addressable.   */
1513
1514 static bool
1515 may_be_nonaddressable_p (tree expr)
1516 {
1517   switch (TREE_CODE (expr))
1518     {
1519     case TARGET_MEM_REF:
1520       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1521          target, thus they are always addressable.  */
1522       return false;
1523
1524     case COMPONENT_REF:
1525       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1526              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1527
1528     case VIEW_CONVERT_EXPR:
1529       /* This kind of view-conversions may wrap non-addressable objects
1530          and make them look addressable.  After some processing the
1531          non-addressability may be uncovered again, causing ADDR_EXPRs
1532          of inappropriate objects to be built.  */
1533       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1534           || is_gimple_min_invariant (TREE_OPERAND (expr, 0)))
1535         return true;
1536
1537       /* ... fall through ... */
1538
1539     case ARRAY_REF:
1540     case ARRAY_RANGE_REF:
1541       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1542
1543     case CONVERT_EXPR:
1544     case NON_LVALUE_EXPR:
1545     case NOP_EXPR:
1546       return true;
1547
1548     default:
1549       break;
1550     }
1551
1552   return false;
1553 }
1554
1555 /* Finds addresses in *OP_P inside STMT.  */
1556
1557 static void
1558 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1559 {
1560   tree base = *op_p, step = build_int_cst (sizetype, 0);
1561   struct iv *civ;
1562   struct ifs_ivopts_data ifs_ivopts_data;
1563
1564   /* Do not play with volatile memory references.  A bit too conservative,
1565      perhaps, but safe.  */
1566   if (stmt_ann (stmt)->has_volatile_ops)
1567     goto fail;
1568
1569   /* Ignore bitfields for now.  Not really something terribly complicated
1570      to handle.  TODO.  */
1571   if (TREE_CODE (base) == BIT_FIELD_REF)
1572     goto fail;
1573
1574   base = unshare_expr (base);
1575
1576   if (TREE_CODE (base) == TARGET_MEM_REF)
1577     {
1578       tree type = build_pointer_type (TREE_TYPE (base));
1579       tree astep;
1580
1581       if (TMR_BASE (base)
1582           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1583         {
1584           civ = get_iv (data, TMR_BASE (base));
1585           if (!civ)
1586             goto fail;
1587
1588           TMR_BASE (base) = civ->base;
1589           step = civ->step;
1590         }
1591       if (TMR_INDEX (base)
1592           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1593         {
1594           civ = get_iv (data, TMR_INDEX (base));
1595           if (!civ)
1596             goto fail;
1597
1598           TMR_INDEX (base) = civ->base;
1599           astep = civ->step;
1600
1601           if (astep)
1602             {
1603               if (TMR_STEP (base))
1604                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1605
1606               step = fold_build2 (PLUS_EXPR, type, step, astep);
1607             }
1608         }
1609
1610       if (integer_zerop (step))
1611         goto fail;
1612       base = tree_mem_ref_addr (type, base);
1613     }
1614   else
1615     {
1616       ifs_ivopts_data.ivopts_data = data;
1617       ifs_ivopts_data.stmt = stmt;
1618       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1619       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1620           || integer_zerop (ifs_ivopts_data.step))
1621         goto fail;
1622       step = ifs_ivopts_data.step;
1623
1624       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1625       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1626
1627       /* Check that the base expression is addressable.  This needs
1628          to be done after substituting bases of IVs into it.  */
1629       if (may_be_nonaddressable_p (base))
1630         goto fail;
1631
1632       /* Moreover, on strict alignment platforms, check that it is
1633          sufficiently aligned.  */
1634       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1635         goto fail;
1636
1637       base = build_fold_addr_expr (base);
1638
1639       /* Substituting bases of IVs into the base expression might
1640          have caused folding opportunities.  */
1641       if (TREE_CODE (base) == ADDR_EXPR)
1642         {
1643           tree *ref = &TREE_OPERAND (base, 0);
1644           while (handled_component_p (*ref))
1645             ref = &TREE_OPERAND (*ref, 0);
1646           if (TREE_CODE (*ref) == INDIRECT_REF)
1647             *ref = fold_indirect_ref (*ref);
1648         }
1649     }
1650
1651   civ = alloc_iv (base, step);
1652   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1653   return;
1654
1655 fail:
1656   for_each_index (op_p, idx_record_use, data);
1657 }
1658
1659 /* Finds and records invariants used in STMT.  */
1660
1661 static void
1662 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1663 {
1664   ssa_op_iter iter;
1665   use_operand_p use_p;
1666   tree op;
1667
1668   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1669     {
1670       op = USE_FROM_PTR (use_p);
1671       record_invariant (data, op, false);
1672     }
1673 }
1674
1675 /* Finds interesting uses of induction variables in the statement STMT.  */
1676
1677 static void
1678 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1679 {
1680   struct iv *iv;
1681   tree op, lhs, rhs;
1682   ssa_op_iter iter;
1683   use_operand_p use_p;
1684
1685   find_invariants_stmt (data, stmt);
1686
1687   if (TREE_CODE (stmt) == COND_EXPR)
1688     {
1689       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1690       return;
1691     }
1692
1693   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1694     {
1695       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1696       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1697
1698       if (TREE_CODE (lhs) == SSA_NAME)
1699         {
1700           /* If the statement defines an induction variable, the uses are not
1701              interesting by themselves.  */
1702
1703           iv = get_iv (data, lhs);
1704
1705           if (iv && !integer_zerop (iv->step))
1706             return;
1707         }
1708
1709       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1710         {
1711         case tcc_comparison:
1712           find_interesting_uses_cond (data, stmt,
1713                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1714           return;
1715
1716         case tcc_reference:
1717           find_interesting_uses_address (data, stmt,
1718                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1719           if (REFERENCE_CLASS_P (lhs))
1720             find_interesting_uses_address (data, stmt,
1721                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1722           return;
1723
1724         default: ;
1725         }
1726
1727       if (REFERENCE_CLASS_P (lhs)
1728           && is_gimple_val (rhs))
1729         {
1730           find_interesting_uses_address (data, stmt,
1731                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1732           find_interesting_uses_op (data, rhs);
1733           return;
1734         }
1735
1736       /* TODO -- we should also handle address uses of type
1737
1738          memory = call (whatever);
1739
1740          and
1741
1742          call (memory).  */
1743     }
1744
1745   if (TREE_CODE (stmt) == PHI_NODE
1746       && bb_for_stmt (stmt) == data->current_loop->header)
1747     {
1748       lhs = PHI_RESULT (stmt);
1749       iv = get_iv (data, lhs);
1750
1751       if (iv && !integer_zerop (iv->step))
1752         return;
1753     }
1754
1755   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1756     {
1757       op = USE_FROM_PTR (use_p);
1758
1759       if (TREE_CODE (op) != SSA_NAME)
1760         continue;
1761
1762       iv = get_iv (data, op);
1763       if (!iv)
1764         continue;
1765
1766       find_interesting_uses_op (data, op);
1767     }
1768 }
1769
1770 /* Finds interesting uses of induction variables outside of loops
1771    on loop exit edge EXIT.  */
1772
1773 static void
1774 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1775 {
1776   tree phi, def;
1777
1778   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1779     {
1780       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1781       if (is_gimple_reg (def))
1782         find_interesting_uses_op (data, def);
1783     }
1784 }
1785
1786 /* Finds uses of the induction variables that are interesting.  */
1787
1788 static void
1789 find_interesting_uses (struct ivopts_data *data)
1790 {
1791   basic_block bb;
1792   block_stmt_iterator bsi;
1793   tree phi;
1794   basic_block *body = get_loop_body (data->current_loop);
1795   unsigned i;
1796   struct version_info *info;
1797   edge e;
1798
1799   if (dump_file && (dump_flags & TDF_DETAILS))
1800     fprintf (dump_file, "Uses:\n\n");
1801
1802   for (i = 0; i < data->current_loop->num_nodes; i++)
1803     {
1804       edge_iterator ei;
1805       bb = body[i];
1806
1807       FOR_EACH_EDGE (e, ei, bb->succs)
1808         if (e->dest != EXIT_BLOCK_PTR
1809             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1810           find_interesting_uses_outside (data, e);
1811
1812       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1813         find_interesting_uses_stmt (data, phi);
1814       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1815         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1816     }
1817
1818   if (dump_file && (dump_flags & TDF_DETAILS))
1819     {
1820       bitmap_iterator bi;
1821
1822       fprintf (dump_file, "\n");
1823
1824       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1825         {
1826           info = ver_info (data, i);
1827           if (info->inv_id)
1828             {
1829               fprintf (dump_file, "  ");
1830               print_generic_expr (dump_file, info->name, TDF_SLIM);
1831               fprintf (dump_file, " is invariant (%d)%s\n",
1832                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1833             }
1834         }
1835
1836       fprintf (dump_file, "\n");
1837     }
1838
1839   free (body);
1840 }
1841
1842 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1843    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1844    we are at the top-level of the processed address.  */
1845
1846 static tree
1847 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1848                 unsigned HOST_WIDE_INT *offset)
1849 {
1850   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1851   enum tree_code code;
1852   tree type, orig_type = TREE_TYPE (expr);
1853   unsigned HOST_WIDE_INT off0, off1, st;
1854   tree orig_expr = expr;
1855
1856   STRIP_NOPS (expr);
1857
1858   type = TREE_TYPE (expr);
1859   code = TREE_CODE (expr);
1860   *offset = 0;
1861
1862   switch (code)
1863     {
1864     case INTEGER_CST:
1865       if (!cst_and_fits_in_hwi (expr)
1866           || integer_zerop (expr))
1867         return orig_expr;
1868
1869       *offset = int_cst_value (expr);
1870       return build_int_cst (orig_type, 0);
1871
1872     case POINTER_PLUS_EXPR:
1873     case PLUS_EXPR:
1874     case MINUS_EXPR:
1875       op0 = TREE_OPERAND (expr, 0);
1876       op1 = TREE_OPERAND (expr, 1);
1877
1878       op0 = strip_offset_1 (op0, false, false, &off0);
1879       op1 = strip_offset_1 (op1, false, false, &off1);
1880
1881       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1882       if (op0 == TREE_OPERAND (expr, 0)
1883           && op1 == TREE_OPERAND (expr, 1))
1884         return orig_expr;
1885
1886       if (integer_zerop (op1))
1887         expr = op0;
1888       else if (integer_zerop (op0))
1889         {
1890           if (code == MINUS_EXPR)
1891             expr = fold_build1 (NEGATE_EXPR, type, op1);
1892           else
1893             expr = op1;
1894         }
1895       else
1896         expr = fold_build2 (code, type, op0, op1);
1897
1898       return fold_convert (orig_type, expr);
1899
1900     case ARRAY_REF:
1901       if (!inside_addr)
1902         return orig_expr;
1903
1904       step = array_ref_element_size (expr);
1905       if (!cst_and_fits_in_hwi (step))
1906         break;
1907
1908       st = int_cst_value (step);
1909       op1 = TREE_OPERAND (expr, 1);
1910       op1 = strip_offset_1 (op1, false, false, &off1);
1911       *offset = off1 * st;
1912
1913       if (top_compref
1914           && integer_zerop (op1))
1915         {
1916           /* Strip the component reference completely.  */
1917           op0 = TREE_OPERAND (expr, 0);
1918           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1919           *offset += off0;
1920           return op0;
1921         }
1922       break;
1923
1924     case COMPONENT_REF:
1925       if (!inside_addr)
1926         return orig_expr;
1927
1928       tmp = component_ref_field_offset (expr);
1929       if (top_compref
1930           && cst_and_fits_in_hwi (tmp))
1931         {
1932           /* Strip the component reference completely.  */
1933           op0 = TREE_OPERAND (expr, 0);
1934           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1935           *offset = off0 + int_cst_value (tmp);
1936           return op0;
1937         }
1938       break;
1939
1940     case ADDR_EXPR:
1941       op0 = TREE_OPERAND (expr, 0);
1942       op0 = strip_offset_1 (op0, true, true, &off0);
1943       *offset += off0;
1944
1945       if (op0 == TREE_OPERAND (expr, 0))
1946         return orig_expr;
1947
1948       expr = build_fold_addr_expr (op0);
1949       return fold_convert (orig_type, expr);
1950
1951     case INDIRECT_REF:
1952       inside_addr = false;
1953       break;
1954
1955     default:
1956       return orig_expr;
1957     }
1958
1959   /* Default handling of expressions for that we want to recurse into
1960      the first operand.  */
1961   op0 = TREE_OPERAND (expr, 0);
1962   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1963   *offset += off0;
1964
1965   if (op0 == TREE_OPERAND (expr, 0)
1966       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1967     return orig_expr;
1968
1969   expr = copy_node (expr);
1970   TREE_OPERAND (expr, 0) = op0;
1971   if (op1)
1972     TREE_OPERAND (expr, 1) = op1;
1973
1974   /* Inside address, we might strip the top level component references,
1975      thus changing type of the expression.  Handling of ADDR_EXPR
1976      will fix that.  */
1977   expr = fold_convert (orig_type, expr);
1978
1979   return expr;
1980 }
1981
1982 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1983
1984 static tree
1985 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1986 {
1987   return strip_offset_1 (expr, false, false, offset);
1988 }
1989
1990 /* Returns variant of TYPE that can be used as base for different uses.
1991    We return unsigned type with the same precision, which avoids problems
1992    with overflows.  */
1993
1994 static tree
1995 generic_type_for (tree type)
1996 {
1997   if (POINTER_TYPE_P (type))
1998     return unsigned_type_for (type);
1999
2000   if (TYPE_UNSIGNED (type))
2001     return type;
2002
2003   return unsigned_type_for (type);
2004 }
2005
2006 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2007    the bitmap to that we should store it.  */
2008
2009 static struct ivopts_data *fd_ivopts_data;
2010 static tree
2011 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2012 {
2013   bitmap *depends_on = (bitmap *) data;
2014   struct version_info *info;
2015
2016   if (TREE_CODE (*expr_p) != SSA_NAME)
2017     return NULL_TREE;
2018   info = name_info (fd_ivopts_data, *expr_p);
2019
2020   if (!info->inv_id || info->has_nonlin_use)
2021     return NULL_TREE;
2022
2023   if (!*depends_on)
2024     *depends_on = BITMAP_ALLOC (NULL);
2025   bitmap_set_bit (*depends_on, info->inv_id);
2026
2027   return NULL_TREE;
2028 }
2029
2030 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2031    position to POS.  If USE is not NULL, the candidate is set as related to
2032    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2033    replacement of the final value of the iv by a direct computation.  */
2034
2035 static struct iv_cand *
2036 add_candidate_1 (struct ivopts_data *data,
2037                  tree base, tree step, bool important, enum iv_position pos,
2038                  struct iv_use *use, tree incremented_at)
2039 {
2040   unsigned i;
2041   struct iv_cand *cand = NULL;
2042   tree type, orig_type;
2043
2044   if (base)
2045     {
2046       orig_type = TREE_TYPE (base);
2047       type = generic_type_for (orig_type);
2048       /* Don't convert the base to the generic type for pointers as the generic
2049          type is an integer type with the same size as the pointer type.  */
2050       if (type != orig_type && !POINTER_TYPE_P (orig_type))
2051         {
2052           base = fold_convert (type, base);
2053           step = fold_convert (type, step);
2054         }
2055     }
2056
2057   for (i = 0; i < n_iv_cands (data); i++)
2058     {
2059       cand = iv_cand (data, i);
2060
2061       if (cand->pos != pos)
2062         continue;
2063
2064       if (cand->incremented_at != incremented_at)
2065         continue;
2066
2067       if (!cand->iv)
2068         {
2069           if (!base && !step)
2070             break;
2071
2072           continue;
2073         }
2074
2075       if (!base && !step)
2076         continue;
2077
2078       if (operand_equal_p (base, cand->iv->base, 0)
2079           && operand_equal_p (step, cand->iv->step, 0))
2080         break;
2081     }
2082
2083   if (i == n_iv_cands (data))
2084     {
2085       cand = XCNEW (struct iv_cand);
2086       cand->id = i;
2087
2088       if (!base && !step)
2089         cand->iv = NULL;
2090       else
2091         cand->iv = alloc_iv (base, step);
2092
2093       cand->pos = pos;
2094       if (pos != IP_ORIGINAL && cand->iv)
2095         {
2096           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2097           cand->var_after = cand->var_before;
2098         }
2099       cand->important = important;
2100       cand->incremented_at = incremented_at;
2101       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2102
2103       if (step
2104           && TREE_CODE (step) != INTEGER_CST)
2105         {
2106           fd_ivopts_data = data;
2107           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2108         }
2109
2110       if (dump_file && (dump_flags & TDF_DETAILS))
2111         dump_cand (dump_file, cand);
2112     }
2113
2114   if (important && !cand->important)
2115     {
2116       cand->important = true;
2117       if (dump_file && (dump_flags & TDF_DETAILS))
2118         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2119     }
2120
2121   if (use)
2122     {
2123       bitmap_set_bit (use->related_cands, i);
2124       if (dump_file && (dump_flags & TDF_DETAILS))
2125         fprintf (dump_file, "Candidate %d is related to use %d\n",
2126                  cand->id, use->id);
2127     }
2128
2129   return cand;
2130 }
2131
2132 /* Returns true if incrementing the induction variable at the end of the LOOP
2133    is allowed.
2134
2135    The purpose is to avoid splitting latch edge with a biv increment, thus
2136    creating a jump, possibly confusing other optimization passes and leaving
2137    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2138    is not available (so we do not have a better alternative), or if the latch
2139    edge is already nonempty.  */
2140
2141 static bool
2142 allow_ip_end_pos_p (struct loop *loop)
2143 {
2144   if (!ip_normal_pos (loop))
2145     return true;
2146
2147   if (!empty_block_p (ip_end_pos (loop)))
2148     return true;
2149
2150   return false;
2151 }
2152
2153 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2154    position to POS.  If USE is not NULL, the candidate is set as related to
2155    it.  The candidate computation is scheduled on all available positions.  */
2156
2157 static void
2158 add_candidate (struct ivopts_data *data,
2159                tree base, tree step, bool important, struct iv_use *use)
2160 {
2161   if (ip_normal_pos (data->current_loop))
2162     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2163   if (ip_end_pos (data->current_loop)
2164       && allow_ip_end_pos_p (data->current_loop))
2165     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2166 }
2167
2168 /* Add a standard "0 + 1 * iteration" iv candidate for a
2169    type with SIZE bits.  */
2170
2171 static void
2172 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2173                                      unsigned int size)
2174 {
2175   tree type = lang_hooks.types.type_for_size (size, true);
2176   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2177                  true, NULL);
2178 }
2179
2180 /* Adds standard iv candidates.  */
2181
2182 static void
2183 add_standard_iv_candidates (struct ivopts_data *data)
2184 {
2185   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2186
2187   /* The same for a double-integer type if it is still fast enough.  */
2188   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2189     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2190 }
2191
2192
2193 /* Adds candidates bases on the old induction variable IV.  */
2194
2195 static void
2196 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2197 {
2198   tree phi, def;
2199   struct iv_cand *cand;
2200
2201   add_candidate (data, iv->base, iv->step, true, NULL);
2202
2203   /* The same, but with initial value zero.  */
2204   add_candidate (data,
2205                  build_int_cst (TREE_TYPE (iv->base), 0),
2206                  iv->step, true, NULL);
2207
2208   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2209   if (TREE_CODE (phi) == PHI_NODE)
2210     {
2211       /* Additionally record the possibility of leaving the original iv
2212          untouched.  */
2213       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2214       cand = add_candidate_1 (data,
2215                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2216                               SSA_NAME_DEF_STMT (def));
2217       cand->var_before = iv->ssa_name;
2218       cand->var_after = def;
2219     }
2220 }
2221
2222 /* Adds candidates based on the old induction variables.  */
2223
2224 static void
2225 add_old_ivs_candidates (struct ivopts_data *data)
2226 {
2227   unsigned i;
2228   struct iv *iv;
2229   bitmap_iterator bi;
2230
2231   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2232     {
2233       iv = ver_info (data, i)->iv;
2234       if (iv && iv->biv_p && !integer_zerop (iv->step))
2235         add_old_iv_candidates (data, iv);
2236     }
2237 }
2238
2239 /* Adds candidates based on the value of the induction variable IV and USE.  */
2240
2241 static void
2242 add_iv_value_candidates (struct ivopts_data *data,
2243                          struct iv *iv, struct iv_use *use)
2244 {
2245   unsigned HOST_WIDE_INT offset;
2246   tree base;
2247   tree basetype;
2248
2249   add_candidate (data, iv->base, iv->step, false, use);
2250
2251   /* The same, but with initial value zero.  Make such variable important,
2252      since it is generic enough so that possibly many uses may be based
2253      on it.  */
2254   basetype = TREE_TYPE (iv->base);
2255   if (POINTER_TYPE_P (basetype))
2256     basetype = sizetype;
2257   add_candidate (data, build_int_cst (basetype, 0),
2258                  iv->step, true, use);
2259
2260   /* Third, try removing the constant offset.  */
2261   base = strip_offset (iv->base, &offset);
2262   if (offset)
2263     add_candidate (data, base, iv->step, false, use);
2264 }
2265
2266 /* Adds candidates based on the uses.  */
2267
2268 static void
2269 add_derived_ivs_candidates (struct ivopts_data *data)
2270 {
2271   unsigned i;
2272
2273   for (i = 0; i < n_iv_uses (data); i++)
2274     {
2275       struct iv_use *use = iv_use (data, i);
2276
2277       if (!use)
2278         continue;
2279
2280       switch (use->type)
2281         {
2282         case USE_NONLINEAR_EXPR:
2283         case USE_COMPARE:
2284         case USE_ADDRESS:
2285           /* Just add the ivs based on the value of the iv used here.  */
2286           add_iv_value_candidates (data, use->iv, use);
2287           break;
2288
2289         default:
2290           gcc_unreachable ();
2291         }
2292     }
2293 }
2294
2295 /* Record important candidates and add them to related_cands bitmaps
2296    if needed.  */
2297
2298 static void
2299 record_important_candidates (struct ivopts_data *data)
2300 {
2301   unsigned i;
2302   struct iv_use *use;
2303
2304   for (i = 0; i < n_iv_cands (data); i++)
2305     {
2306       struct iv_cand *cand = iv_cand (data, i);
2307
2308       if (cand->important)
2309         bitmap_set_bit (data->important_candidates, i);
2310     }
2311
2312   data->consider_all_candidates = (n_iv_cands (data)
2313                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2314
2315   if (data->consider_all_candidates)
2316     {
2317       /* We will not need "related_cands" bitmaps in this case,
2318          so release them to decrease peak memory consumption.  */
2319       for (i = 0; i < n_iv_uses (data); i++)
2320         {
2321           use = iv_use (data, i);
2322           BITMAP_FREE (use->related_cands);
2323         }
2324     }
2325   else
2326     {
2327       /* Add important candidates to the related_cands bitmaps.  */
2328       for (i = 0; i < n_iv_uses (data); i++)
2329         bitmap_ior_into (iv_use (data, i)->related_cands,
2330                          data->important_candidates);
2331     }
2332 }
2333
2334 /* Finds the candidates for the induction variables.  */
2335
2336 static void
2337 find_iv_candidates (struct ivopts_data *data)
2338 {
2339   /* Add commonly used ivs.  */
2340   add_standard_iv_candidates (data);
2341
2342   /* Add old induction variables.  */
2343   add_old_ivs_candidates (data);
2344
2345   /* Add induction variables derived from uses.  */
2346   add_derived_ivs_candidates (data);
2347
2348   /* Record the important candidates.  */
2349   record_important_candidates (data);
2350 }
2351
2352 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2353    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2354    we allocate a simple list to every use.  */
2355
2356 static void
2357 alloc_use_cost_map (struct ivopts_data *data)
2358 {
2359   unsigned i, size, s, j;
2360
2361   for (i = 0; i < n_iv_uses (data); i++)
2362     {
2363       struct iv_use *use = iv_use (data, i);
2364       bitmap_iterator bi;
2365
2366       if (data->consider_all_candidates)
2367         size = n_iv_cands (data);
2368       else
2369         {
2370           s = 0;
2371           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2372             {
2373               s++;
2374             }
2375
2376           /* Round up to the power of two, so that moduling by it is fast.  */
2377           for (size = 1; size < s; size <<= 1)
2378             continue;
2379         }
2380
2381       use->n_map_members = size;
2382       use->cost_map = XCNEWVEC (struct cost_pair, size);
2383     }
2384 }
2385
2386 /* Returns description of computation cost of expression whose runtime
2387    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2388
2389 static comp_cost
2390 new_cost (unsigned runtime, unsigned complexity)
2391 {
2392   comp_cost cost;
2393
2394   cost.cost = runtime;
2395   cost.complexity = complexity;
2396
2397   return cost;
2398 }
2399
2400 /* Adds costs COST1 and COST2.  */
2401
2402 static comp_cost
2403 add_costs (comp_cost cost1, comp_cost cost2)
2404 {
2405   cost1.cost += cost2.cost;
2406   cost1.complexity += cost2.complexity;
2407
2408   return cost1;
2409 }
2410 /* Subtracts costs COST1 and COST2.  */
2411
2412 static comp_cost
2413 sub_costs (comp_cost cost1, comp_cost cost2)
2414 {
2415   cost1.cost -= cost2.cost;
2416   cost1.complexity -= cost2.complexity;
2417
2418   return cost1;
2419 }
2420
2421 /* Returns a negative number if COST1 < COST2, a positive number if
2422    COST1 > COST2, and 0 if COST1 = COST2.  */
2423
2424 static int
2425 compare_costs (comp_cost cost1, comp_cost cost2)
2426 {
2427   if (cost1.cost == cost2.cost)
2428     return cost1.complexity - cost2.complexity;
2429
2430   return cost1.cost - cost2.cost;
2431 }
2432
2433 /* Returns true if COST is infinite.  */
2434
2435 static bool
2436 infinite_cost_p (comp_cost cost)
2437 {
2438   return cost.cost == INFTY;
2439 }
2440
2441 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2442    on invariants DEPENDS_ON and that the value used in expressing it
2443    is VALUE.*/
2444
2445 static void
2446 set_use_iv_cost (struct ivopts_data *data,
2447                  struct iv_use *use, struct iv_cand *cand,
2448                  comp_cost cost, bitmap depends_on, tree value)
2449 {
2450   unsigned i, s;
2451
2452   if (infinite_cost_p (cost))
2453     {
2454       BITMAP_FREE (depends_on);
2455       return;
2456     }
2457
2458   if (data->consider_all_candidates)
2459     {
2460       use->cost_map[cand->id].cand = cand;
2461       use->cost_map[cand->id].cost = cost;
2462       use->cost_map[cand->id].depends_on = depends_on;
2463       use->cost_map[cand->id].value = value;
2464       return;
2465     }
2466
2467   /* n_map_members is a power of two, so this computes modulo.  */
2468   s = cand->id & (use->n_map_members - 1);
2469   for (i = s; i < use->n_map_members; i++)
2470     if (!use->cost_map[i].cand)
2471       goto found;
2472   for (i = 0; i < s; i++)
2473     if (!use->cost_map[i].cand)
2474       goto found;
2475
2476   gcc_unreachable ();
2477
2478 found:
2479   use->cost_map[i].cand = cand;
2480   use->cost_map[i].cost = cost;
2481   use->cost_map[i].depends_on = depends_on;
2482   use->cost_map[i].value = value;
2483 }
2484
2485 /* Gets cost of (USE, CANDIDATE) pair.  */
2486
2487 static struct cost_pair *
2488 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2489                  struct iv_cand *cand)
2490 {
2491   unsigned i, s;
2492   struct cost_pair *ret;
2493
2494   if (!cand)
2495     return NULL;
2496
2497   if (data->consider_all_candidates)
2498     {
2499       ret = use->cost_map + cand->id;
2500       if (!ret->cand)
2501         return NULL;
2502
2503       return ret;
2504     }
2505
2506   /* n_map_members is a power of two, so this computes modulo.  */
2507   s = cand->id & (use->n_map_members - 1);
2508   for (i = s; i < use->n_map_members; i++)
2509     if (use->cost_map[i].cand == cand)
2510       return use->cost_map + i;
2511
2512   for (i = 0; i < s; i++)
2513     if (use->cost_map[i].cand == cand)
2514       return use->cost_map + i;
2515
2516   return NULL;
2517 }
2518
2519 /* Returns estimate on cost of computing SEQ.  */
2520
2521 static unsigned
2522 seq_cost (rtx seq)
2523 {
2524   unsigned cost = 0;
2525   rtx set;
2526
2527   for (; seq; seq = NEXT_INSN (seq))
2528     {
2529       set = single_set (seq);
2530       if (set)
2531         cost += rtx_cost (set, SET);
2532       else
2533         cost++;
2534     }
2535
2536   return cost;
2537 }
2538
2539 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2540 static rtx
2541 produce_memory_decl_rtl (tree obj, int *regno)
2542 {
2543   rtx x;
2544
2545   gcc_assert (obj);
2546   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2547     {
2548       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2549       x = gen_rtx_SYMBOL_REF (Pmode, name);
2550       SET_SYMBOL_REF_DECL (x, obj);
2551       x = gen_rtx_MEM (DECL_MODE (obj), x);
2552       targetm.encode_section_info (obj, x, true);
2553     }
2554   else
2555     {
2556       x = gen_raw_REG (Pmode, (*regno)++);
2557       x = gen_rtx_MEM (DECL_MODE (obj), x);
2558     }
2559
2560   return x;
2561 }
2562
2563 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2564    walk_tree.  DATA contains the actual fake register number.  */
2565
2566 static tree
2567 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2568 {
2569   tree obj = NULL_TREE;
2570   rtx x = NULL_RTX;
2571   int *regno = (int *) data;
2572
2573   switch (TREE_CODE (*expr_p))
2574     {
2575     case ADDR_EXPR:
2576       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2577            handled_component_p (*expr_p);
2578            expr_p = &TREE_OPERAND (*expr_p, 0))
2579         continue;
2580       obj = *expr_p;
2581       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2582         x = produce_memory_decl_rtl (obj, regno);
2583       break;
2584
2585     case SSA_NAME:
2586       *ws = 0;
2587       obj = SSA_NAME_VAR (*expr_p);
2588       if (!DECL_RTL_SET_P (obj))
2589         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2590       break;
2591
2592     case VAR_DECL:
2593     case PARM_DECL:
2594     case RESULT_DECL:
2595       *ws = 0;
2596       obj = *expr_p;
2597
2598       if (DECL_RTL_SET_P (obj))
2599         break;
2600
2601       if (DECL_MODE (obj) == BLKmode)
2602         x = produce_memory_decl_rtl (obj, regno);
2603       else
2604         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2605
2606       break;
2607
2608     default:
2609       break;
2610     }
2611
2612   if (x)
2613     {
2614       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2615       SET_DECL_RTL (obj, x);
2616     }
2617
2618   return NULL_TREE;
2619 }
2620
2621 /* Determines cost of the computation of EXPR.  */
2622
2623 static unsigned
2624 computation_cost (tree expr)
2625 {
2626   rtx seq, rslt;
2627   tree type = TREE_TYPE (expr);
2628   unsigned cost;
2629   /* Avoid using hard regs in ways which may be unsupported.  */
2630   int regno = LAST_VIRTUAL_REGISTER + 1;
2631
2632   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2633   start_sequence ();
2634   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2635   seq = get_insns ();
2636   end_sequence ();
2637
2638   cost = seq_cost (seq);
2639   if (MEM_P (rslt))
2640     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2641
2642   return cost;
2643 }
2644
2645 /* Returns variable containing the value of candidate CAND at statement AT.  */
2646
2647 static tree
2648 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2649 {
2650   if (stmt_after_increment (loop, cand, stmt))
2651     return cand->var_after;
2652   else
2653     return cand->var_before;
2654 }
2655
2656 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2657    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2658
2659 int
2660 tree_int_cst_sign_bit (const_tree t)
2661 {
2662   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2663   unsigned HOST_WIDE_INT w;
2664
2665   if (bitno < HOST_BITS_PER_WIDE_INT)
2666     w = TREE_INT_CST_LOW (t);
2667   else
2668     {
2669       w = TREE_INT_CST_HIGH (t);
2670       bitno -= HOST_BITS_PER_WIDE_INT;
2671     }
2672
2673   return (w >> bitno) & 1;
2674 }
2675
2676 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2677    same precision that is at least as wide as the precision of TYPE, stores
2678    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2679    type of A and B.  */
2680
2681 static tree
2682 determine_common_wider_type (tree *a, tree *b)
2683 {
2684   tree wider_type = NULL;
2685   tree suba, subb;
2686   tree atype = TREE_TYPE (*a);
2687
2688   if ((TREE_CODE (*a) == NOP_EXPR
2689        || TREE_CODE (*a) == CONVERT_EXPR))
2690     {
2691       suba = TREE_OPERAND (*a, 0);
2692       wider_type = TREE_TYPE (suba);
2693       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2694         return atype;
2695     }
2696   else
2697     return atype;
2698
2699   if ((TREE_CODE (*b) == NOP_EXPR
2700        || TREE_CODE (*b) == CONVERT_EXPR))
2701     {
2702       subb = TREE_OPERAND (*b, 0);
2703       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2704         return atype;
2705     }
2706   else
2707     return atype;
2708
2709   *a = suba;
2710   *b = subb;
2711   return wider_type;
2712 }
2713
2714 /* Determines the expression by that USE is expressed from induction variable
2715    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2716    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2717
2718 static bool
2719 get_computation_aff (struct loop *loop,
2720                      struct iv_use *use, struct iv_cand *cand, tree at,
2721                      struct affine_tree_combination *aff)
2722 {
2723   tree ubase = use->iv->base;
2724   tree ustep = use->iv->step;
2725   tree cbase = cand->iv->base;
2726   tree cstep = cand->iv->step, cstep_common;
2727   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2728   tree common_type, var;
2729   tree uutype;
2730   aff_tree cbase_aff, var_aff;
2731   double_int rat;
2732
2733   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2734     {
2735       /* We do not have a precision to express the values of use.  */
2736       return false;
2737     }
2738
2739   var = var_at_stmt (loop, cand, at);
2740   uutype = unsigned_type_for (utype);
2741
2742   /* If the conversion is not noop, perform it.  */
2743   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2744     {
2745       cstep = fold_convert (uutype, cstep);
2746       cbase = fold_convert (uutype, cbase);
2747       var = fold_convert (uutype, var);
2748     }
2749
2750   if (!constant_multiple_of (ustep, cstep, &rat))
2751     return false;
2752
2753   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2754      type, we achieve better folding by computing their difference in this
2755      wider type, and cast the result to UUTYPE.  We do not need to worry about
2756      overflows, as all the arithmetics will in the end be performed in UUTYPE
2757      anyway.  */
2758   common_type = determine_common_wider_type (&ubase, &cbase);
2759
2760   /* use = ubase - ratio * cbase + ratio * var.  */
2761   tree_to_aff_combination (ubase, common_type, aff);
2762   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2763   tree_to_aff_combination (var, uutype, &var_aff);
2764
2765   /* We need to shift the value if we are after the increment.  */
2766   if (stmt_after_increment (loop, cand, at))
2767     {
2768       aff_tree cstep_aff;
2769
2770       if (common_type != uutype)
2771         cstep_common = fold_convert (common_type, cstep);
2772       else
2773         cstep_common = cstep;
2774
2775       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2776       aff_combination_add (&cbase_aff, &cstep_aff);
2777     }
2778
2779   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2780   aff_combination_add (aff, &cbase_aff);
2781   if (common_type != uutype)
2782     aff_combination_convert (aff, uutype);
2783
2784   aff_combination_scale (&var_aff, rat);
2785   aff_combination_add (aff, &var_aff);
2786
2787   return true;
2788 }
2789
2790 /* Determines the expression by that USE is expressed from induction variable
2791    CAND at statement AT in LOOP.  The computation is unshared.  */
2792
2793 static tree
2794 get_computation_at (struct loop *loop,
2795                     struct iv_use *use, struct iv_cand *cand, tree at)
2796 {
2797   aff_tree aff;
2798   tree type = TREE_TYPE (use->iv->base);
2799
2800   if (!get_computation_aff (loop, use, cand, at, &aff))
2801     return NULL_TREE;
2802   unshare_aff_combination (&aff);
2803   return fold_convert (type, aff_combination_to_tree (&aff));
2804 }
2805
2806 /* Determines the expression by that USE is expressed from induction variable
2807    CAND in LOOP.  The computation is unshared.  */
2808
2809 static tree
2810 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2811 {
2812   return get_computation_at (loop, use, cand, use->stmt);
2813 }
2814
2815 /* Returns cost of addition in MODE.  */
2816
2817 static unsigned
2818 add_cost (enum machine_mode mode)
2819 {
2820   static unsigned costs[NUM_MACHINE_MODES];
2821   rtx seq;
2822   unsigned cost;
2823
2824   if (costs[mode])
2825     return costs[mode];
2826
2827   start_sequence ();
2828   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2829                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2830                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2831                  NULL_RTX);
2832   seq = get_insns ();
2833   end_sequence ();
2834
2835   cost = seq_cost (seq);
2836   if (!cost)
2837     cost = 1;
2838
2839   costs[mode] = cost;
2840
2841   if (dump_file && (dump_flags & TDF_DETAILS))
2842     fprintf (dump_file, "Addition in %s costs %d\n",
2843              GET_MODE_NAME (mode), cost);
2844   return cost;
2845 }
2846
2847 /* Entry in a hashtable of already known costs for multiplication.  */
2848 struct mbc_entry
2849 {
2850   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2851   enum machine_mode mode;       /* In mode.  */
2852   unsigned cost;                /* The cost.  */
2853 };
2854
2855 /* Counts hash value for the ENTRY.  */
2856
2857 static hashval_t
2858 mbc_entry_hash (const void *entry)
2859 {
2860   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2861
2862   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2863 }
2864
2865 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2866
2867 static int
2868 mbc_entry_eq (const void *entry1, const void *entry2)
2869 {
2870   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2871   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2872
2873   return (e1->mode == e2->mode
2874           && e1->cst == e2->cst);
2875 }
2876
2877 /* Returns cost of multiplication by constant CST in MODE.  */
2878
2879 unsigned
2880 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2881 {
2882   static htab_t costs;
2883   struct mbc_entry **cached, act;
2884   rtx seq;
2885   unsigned cost;
2886
2887   if (!costs)
2888     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2889
2890   act.mode = mode;
2891   act.cst = cst;
2892   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2893   if (*cached)
2894     return (*cached)->cost;
2895
2896   *cached = XNEW (struct mbc_entry);
2897   (*cached)->mode = mode;
2898   (*cached)->cst = cst;
2899
2900   start_sequence ();
2901   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2902                gen_int_mode (cst, mode), NULL_RTX, 0);
2903   seq = get_insns ();
2904   end_sequence ();
2905
2906   cost = seq_cost (seq);
2907
2908   if (dump_file && (dump_flags & TDF_DETAILS))
2909     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2910              (int) cst, GET_MODE_NAME (mode), cost);
2911
2912   (*cached)->cost = cost;
2913
2914   return cost;
2915 }
2916
2917 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2918    validity for a memory reference accessing memory of mode MODE.  */
2919
2920 bool
2921 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2922 {
2923 #define MAX_RATIO 128
2924   static sbitmap valid_mult[MAX_MACHINE_MODE];
2925
2926   if (!valid_mult[mode])
2927     {
2928       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2929       rtx addr;
2930       HOST_WIDE_INT i;
2931
2932       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2933       sbitmap_zero (valid_mult[mode]);
2934       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2935       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2936         {
2937           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2938           if (memory_address_p (mode, addr))
2939             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2940         }
2941
2942       if (dump_file && (dump_flags & TDF_DETAILS))
2943         {
2944           fprintf (dump_file, "  allowed multipliers:");
2945           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2946             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2947               fprintf (dump_file, " %d", (int) i);
2948           fprintf (dump_file, "\n");
2949           fprintf (dump_file, "\n");
2950         }
2951     }
2952
2953   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2954     return false;
2955
2956   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2957 }
2958
2959 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2960    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2961    variable is omitted.  Compute the cost for a memory reference that accesses
2962    a memory location of mode MEM_MODE.
2963
2964    TODO -- there must be some better way.  This all is quite crude.  */
2965
2966 static comp_cost
2967 get_address_cost (bool symbol_present, bool var_present,
2968                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2969                   enum machine_mode mem_mode)
2970 {
2971   static bool initialized[MAX_MACHINE_MODE];
2972   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2973   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2974   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2975   unsigned cost, acost, complexity;
2976   bool offset_p, ratio_p;
2977   HOST_WIDE_INT s_offset;
2978   unsigned HOST_WIDE_INT mask;
2979   unsigned bits;
2980
2981   if (!initialized[mem_mode])
2982     {
2983       HOST_WIDE_INT i;
2984       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2985       int old_cse_not_expected;
2986       unsigned sym_p, var_p, off_p, rat_p, add_c;
2987       rtx seq, addr, base;
2988       rtx reg0, reg1;
2989
2990       initialized[mem_mode] = true;
2991
2992       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2993
2994       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2995       for (i = start; i <= 1 << 20; i <<= 1)
2996         {
2997           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2998           if (!memory_address_p (mem_mode, addr))
2999             break;
3000         }
3001       max_offset[mem_mode] = i == start ? 0 : i >> 1;
3002       off[mem_mode] = max_offset[mem_mode];
3003
3004       for (i = start; i <= 1 << 20; i <<= 1)
3005         {
3006           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3007           if (!memory_address_p (mem_mode, addr))
3008             break;
3009         }
3010       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3011
3012       if (dump_file && (dump_flags & TDF_DETAILS))
3013         {
3014           fprintf (dump_file, "get_address_cost:\n");
3015           fprintf (dump_file, "  min offset %s %d\n",
3016                    GET_MODE_NAME (mem_mode),
3017                    (int) min_offset[mem_mode]);
3018           fprintf (dump_file, "  max offset %s %d\n",
3019                    GET_MODE_NAME (mem_mode),
3020                    (int) max_offset[mem_mode]);
3021         }
3022
3023       rat[mem_mode] = 1;
3024       for (i = 2; i <= MAX_RATIO; i++)
3025         if (multiplier_allowed_in_address_p (i, mem_mode))
3026           {
3027             rat[mem_mode] = i;
3028             break;
3029           }
3030
3031       /* Compute the cost of various addressing modes.  */
3032       acost = 0;
3033       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3034       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3035
3036       for (i = 0; i < 16; i++)
3037         {
3038           sym_p = i & 1;
3039           var_p = (i >> 1) & 1;
3040           off_p = (i >> 2) & 1;
3041           rat_p = (i >> 3) & 1;
3042
3043           addr = reg0;
3044           if (rat_p)
3045             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3046                                    gen_int_mode (rat[mem_mode], Pmode));
3047
3048           if (var_p)
3049             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3050
3051           if (sym_p)
3052             {
3053               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3054               /* ??? We can run into trouble with some backends by presenting
3055                  it with symbols which havn't been properly passed through
3056                  targetm.encode_section_info.  By setting the local bit, we
3057                  enhance the probability of things working.  */
3058               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3059
3060               if (off_p)
3061                 base = gen_rtx_fmt_e (CONST, Pmode,
3062                                       gen_rtx_fmt_ee (PLUS, Pmode,
3063                                                       base,
3064                                                       gen_int_mode (off[mem_mode],
3065                                                                     Pmode)));
3066             }
3067           else if (off_p)
3068             base = gen_int_mode (off[mem_mode], Pmode);
3069           else
3070             base = NULL_RTX;
3071
3072           if (base)
3073             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3074
3075           start_sequence ();
3076           /* To avoid splitting addressing modes, pretend that no cse will
3077              follow.  */
3078           old_cse_not_expected = cse_not_expected;
3079           cse_not_expected = true;
3080           addr = memory_address (mem_mode, addr);
3081           cse_not_expected = old_cse_not_expected;
3082           seq = get_insns ();
3083           end_sequence ();
3084
3085           acost = seq_cost (seq);
3086           acost += address_cost (addr, mem_mode);
3087
3088           if (!acost)
3089             acost = 1;
3090           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3091         }
3092
3093       /* On some targets, it is quite expensive to load symbol to a register,
3094          which makes addresses that contain symbols look much more expensive.
3095          However, the symbol will have to be loaded in any case before the
3096          loop (and quite likely we have it in register already), so it does not
3097          make much sense to penalize them too heavily.  So make some final
3098          tweaks for the SYMBOL_PRESENT modes:
3099
3100          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3101          var is cheaper, use this mode with small penalty.
3102          If VAR_PRESENT is true, try whether the mode with
3103          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3104          if this is the case, use it.  */
3105       add_c = add_cost (Pmode);
3106       for (i = 0; i < 8; i++)
3107         {
3108           var_p = i & 1;
3109           off_p = (i >> 1) & 1;
3110           rat_p = (i >> 2) & 1;
3111
3112           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3113           if (var_p)
3114             acost += add_c;
3115
3116           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3117             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3118         }
3119
3120       if (dump_file && (dump_flags & TDF_DETAILS))
3121         {
3122           fprintf (dump_file, "Address costs:\n");
3123
3124           for (i = 0; i < 16; i++)
3125             {
3126               sym_p = i & 1;
3127               var_p = (i >> 1) & 1;
3128               off_p = (i >> 2) & 1;
3129               rat_p = (i >> 3) & 1;
3130
3131               fprintf (dump_file, "  ");
3132               if (sym_p)
3133                 fprintf (dump_file, "sym + ");
3134               if (var_p)
3135                 fprintf (dump_file, "var + ");
3136               if (off_p)
3137                 fprintf (dump_file, "cst + ");
3138               if (rat_p)
3139                 fprintf (dump_file, "rat * ");
3140
3141               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3142               fprintf (dump_file, "index costs %d\n", acost);
3143             }
3144           fprintf (dump_file, "\n");
3145         }
3146     }
3147
3148   bits = GET_MODE_BITSIZE (Pmode);
3149   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3150   offset &= mask;
3151   if ((offset >> (bits - 1) & 1))
3152     offset |= ~mask;
3153   s_offset = offset;
3154
3155   cost = 0;
3156   offset_p = (s_offset != 0
3157               && min_offset[mem_mode] <= s_offset
3158               && s_offset <= max_offset[mem_mode]);
3159   ratio_p = (ratio != 1
3160              && multiplier_allowed_in_address_p (ratio, mem_mode));
3161
3162   if (ratio != 1 && !ratio_p)
3163     cost += multiply_by_cost (ratio, Pmode);
3164
3165   if (s_offset && !offset_p && !symbol_present)
3166     cost += add_cost (Pmode);
3167
3168   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3169   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3170   return new_cost (cost + acost, complexity);
3171 }
3172
3173 /* Estimates cost of forcing expression EXPR into a variable.  */
3174
3175 static comp_cost
3176 force_expr_to_var_cost (tree expr)
3177 {
3178   static bool costs_initialized = false;
3179   static unsigned integer_cost;
3180   static unsigned symbol_cost;
3181   static unsigned address_cost;
3182   tree op0, op1;
3183   comp_cost cost0, cost1, cost;
3184   enum machine_mode mode;
3185
3186   if (!costs_initialized)
3187     {
3188       tree type = build_pointer_type (integer_type_node);
3189       tree var, addr;
3190       rtx x;
3191
3192       var = create_tmp_var_raw (integer_type_node, "test_var");
3193       TREE_STATIC (var) = 1;
3194       x = produce_memory_decl_rtl (var, NULL);
3195       SET_DECL_RTL (var, x);
3196
3197       integer_cost = computation_cost (build_int_cst (integer_type_node,
3198                                                       2000));
3199
3200       addr = build1 (ADDR_EXPR, type, var);
3201       symbol_cost = computation_cost (addr) + 1;
3202
3203       address_cost
3204         = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3205                                     addr,
3206                                     build_int_cst (sizetype, 2000))) + 1;
3207       if (dump_file && (dump_flags & TDF_DETAILS))
3208         {
3209           fprintf (dump_file, "force_expr_to_var_cost:\n");
3210           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3211           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3212           fprintf (dump_file, "  address %d\n", (int) address_cost);
3213           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3214           fprintf (dump_file, "\n");
3215         }
3216
3217       costs_initialized = true;
3218     }
3219
3220   STRIP_NOPS (expr);
3221
3222   if (SSA_VAR_P (expr))
3223     return zero_cost;
3224
3225   if (is_gimple_min_invariant (expr))
3226     {
3227       if (TREE_CODE (expr) == INTEGER_CST)
3228         return new_cost (integer_cost, 0);
3229
3230       if (TREE_CODE (expr) == ADDR_EXPR)
3231         {
3232           tree obj = TREE_OPERAND (expr, 0);
3233
3234           if (TREE_CODE (obj) == VAR_DECL
3235               || TREE_CODE (obj) == PARM_DECL
3236               || TREE_CODE (obj) == RESULT_DECL)
3237             return new_cost (symbol_cost, 0);
3238         }
3239
3240       return new_cost (address_cost, 0);
3241     }
3242
3243   switch (TREE_CODE (expr))
3244     {
3245     case POINTER_PLUS_EXPR:
3246     case PLUS_EXPR:
3247     case MINUS_EXPR:
3248     case MULT_EXPR:
3249       op0 = TREE_OPERAND (expr, 0);
3250       op1 = TREE_OPERAND (expr, 1);
3251       STRIP_NOPS (op0);
3252       STRIP_NOPS (op1);
3253
3254       if (is_gimple_val (op0))
3255         cost0 = zero_cost;
3256       else
3257         cost0 = force_expr_to_var_cost (op0);
3258
3259       if (is_gimple_val (op1))
3260         cost1 = zero_cost;
3261       else
3262         cost1 = force_expr_to_var_cost (op1);
3263
3264       break;
3265
3266     default:
3267       /* Just an arbitrary value, FIXME.  */
3268       return new_cost (target_spill_cost, 0);
3269     }
3270
3271   mode = TYPE_MODE (TREE_TYPE (expr));
3272   switch (TREE_CODE (expr))
3273     {
3274     case POINTER_PLUS_EXPR:
3275     case PLUS_EXPR:
3276     case MINUS_EXPR:
3277       cost = new_cost (add_cost (mode), 0);
3278       break;
3279
3280     case MULT_EXPR:
3281       if (cst_and_fits_in_hwi (op0))
3282         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3283       else if (cst_and_fits_in_hwi (op1))
3284         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3285       else
3286         return new_cost (target_spill_cost, 0);
3287       break;
3288
3289     default:
3290       gcc_unreachable ();
3291     }
3292
3293   cost = add_costs (cost, cost0);
3294   cost = add_costs (cost, cost1);
3295
3296   /* Bound the cost by target_spill_cost.  The parts of complicated
3297      computations often are either loop invariant or at least can
3298      be shared between several iv uses, so letting this grow without
3299      limits would not give reasonable results.  */
3300   if (cost.cost > target_spill_cost)
3301     cost.cost = target_spill_cost;
3302
3303   return cost;
3304 }
3305
3306 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3307    invariants the computation depends on.  */
3308
3309 static comp_cost
3310 force_var_cost (struct ivopts_data *data,
3311                 tree expr, bitmap *depends_on)
3312 {
3313   if (depends_on)
3314     {
3315       fd_ivopts_data = data;
3316       walk_tree (&expr, find_depends, depends_on, NULL);
3317     }
3318
3319   return force_expr_to_var_cost (expr);
3320 }
3321
3322 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3323    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3324    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3325    invariants the computation depends on.  */
3326
3327 static comp_cost
3328 split_address_cost (struct ivopts_data *data,
3329                     tree addr, bool *symbol_present, bool *var_present,
3330                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3331 {
3332   tree core;
3333   HOST_WIDE_INT bitsize;
3334   HOST_WIDE_INT bitpos;
3335   tree toffset;
3336   enum machine_mode mode;
3337   int unsignedp, volatilep;
3338
3339   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3340                               &unsignedp, &volatilep, false);
3341
3342   if (toffset != 0
3343       || bitpos % BITS_PER_UNIT != 0
3344       || TREE_CODE (core) != VAR_DECL)
3345     {
3346       *symbol_present = false;
3347       *var_present = true;
3348       fd_ivopts_data = data;
3349       walk_tree (&addr, find_depends, depends_on, NULL);
3350       return new_cost (target_spill_cost, 0);
3351     }
3352
3353   *offset += bitpos / BITS_PER_UNIT;
3354   if (TREE_STATIC (core)
3355       || DECL_EXTERNAL (core))
3356     {
3357       *symbol_present = true;
3358       *var_present = false;
3359       return zero_cost;
3360     }
3361
3362   *symbol_present = false;
3363   *var_present = true;
3364   return zero_cost;
3365 }
3366
3367 /* Estimates cost of expressing difference of addresses E1 - E2 as
3368    var + symbol + offset.  The value of offset is added to OFFSET,
3369    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3370    part is missing.  DEPENDS_ON is a set of the invariants the computation
3371    depends on.  */
3372
3373 static comp_cost
3374 ptr_difference_cost (struct ivopts_data *data,
3375                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3376                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3377 {
3378   HOST_WIDE_INT diff = 0;
3379   comp_cost cost;
3380
3381   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3382
3383   if (ptr_difference_const (e1, e2, &diff))
3384     {
3385       *offset += diff;
3386       *symbol_present = false;
3387       *var_present = false;
3388       return zero_cost;
3389     }
3390
3391   if (integer_zerop (e2))
3392     return split_address_cost (data, TREE_OPERAND (e1, 0),
3393                                symbol_present, var_present, offset, depends_on);
3394
3395   *symbol_present = false;
3396   *var_present = true;
3397
3398   cost = force_var_cost (data, e1, depends_on);
3399   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3400   cost.cost += add_cost (Pmode);
3401
3402   return cost;
3403 }
3404
3405 /* Estimates cost of expressing difference E1 - E2 as
3406    var + symbol + offset.  The value of offset is added to OFFSET,
3407    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3408    part is missing.  DEPENDS_ON is a set of the invariants the computation
3409    depends on.  */
3410
3411 static comp_cost
3412 difference_cost (struct ivopts_data *data,
3413                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3414                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3415 {
3416   comp_cost cost;
3417   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3418   unsigned HOST_WIDE_INT off1, off2;
3419
3420   e1 = strip_offset (e1, &off1);
3421   e2 = strip_offset (e2, &off2);
3422   *offset += off1 - off2;
3423
3424   STRIP_NOPS (e1);
3425   STRIP_NOPS (e2);
3426
3427   if (TREE_CODE (e1) == ADDR_EXPR)
3428     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3429                                 depends_on);
3430   *symbol_present = false;
3431
3432   if (operand_equal_p (e1, e2, 0))
3433     {
3434       *var_present = false;
3435       return zero_cost;
3436     }
3437   *var_present = true;
3438   if (integer_zerop (e2))
3439     return force_var_cost (data, e1, depends_on);
3440
3441   if (integer_zerop (e1))
3442     {
3443       cost = force_var_cost (data, e2, depends_on);
3444       cost.cost += multiply_by_cost (-1, mode);
3445
3446       return cost;
3447     }
3448
3449   cost = force_var_cost (data, e1, depends_on);
3450   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3451   cost.cost += add_cost (mode);
3452
3453   return cost;
3454 }
3455
3456 /* Determines the cost of the computation by that USE is expressed
3457    from induction variable CAND.  If ADDRESS_P is true, we just need
3458    to create an address from it, otherwise we want to get it into
3459    register.  A set of invariants we depend on is stored in
3460    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3461
3462 static comp_cost
3463 get_computation_cost_at (struct ivopts_data *data,
3464                          struct iv_use *use, struct iv_cand *cand,
3465                          bool address_p, bitmap *depends_on, tree at)
3466 {
3467   tree ubase = use->iv->base, ustep = use->iv->step;
3468   tree cbase, cstep;
3469   tree utype = TREE_TYPE (ubase), ctype;
3470   unsigned HOST_WIDE_INT cstepi, offset = 0;
3471   HOST_WIDE_INT ratio, aratio;
3472   bool var_present, symbol_present;
3473   comp_cost cost;
3474   unsigned n_sums;
3475   double_int rat;
3476
3477   *depends_on = NULL;
3478
3479   /* Only consider real candidates.  */
3480   if (!cand->iv)
3481     return infinite_cost;
3482
3483   cbase = cand->iv->base;
3484   cstep = cand->iv->step;
3485   ctype = TREE_TYPE (cbase);
3486
3487   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3488     {
3489       /* We do not have a precision to express the values of use.  */
3490       return infinite_cost;
3491     }
3492
3493   if (address_p)
3494     {
3495       /* Do not try to express address of an object with computation based
3496          on address of a different object.  This may cause problems in rtl
3497          level alias analysis (that does not expect this to be happening,
3498          as this is illegal in C), and would be unlikely to be useful
3499          anyway.  */
3500       if (use->iv->base_object
3501           && cand->iv->base_object
3502           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3503         return infinite_cost;
3504     }
3505
3506   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3507     {
3508       /* TODO -- add direct handling of this case.  */
3509       goto fallback;
3510     }
3511
3512   /* CSTEPI is removed from the offset in case statement is after the
3513      increment.  If the step is not constant, we use zero instead.
3514      This is a bit imprecise (there is the extra addition), but
3515      redundancy elimination is likely to transform the code so that
3516      it uses value of the variable before increment anyway,
3517      so it is not that much unrealistic.  */
3518   if (cst_and_fits_in_hwi (cstep))
3519     cstepi = int_cst_value (cstep);
3520   else
3521     cstepi = 0;
3522
3523   if (!constant_multiple_of (ustep, cstep, &rat))
3524     return infinite_cost;
3525
3526   if (double_int_fits_in_shwi_p (rat))
3527     ratio = double_int_to_shwi (rat);
3528   else
3529     return infinite_cost;
3530
3531   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3532      or ratio == 1, it is better to handle this like
3533
3534      ubase - ratio * cbase + ratio * var
3535
3536      (also holds in the case ratio == -1, TODO.  */
3537
3538   if (cst_and_fits_in_hwi (cbase))
3539     {
3540       offset = - ratio * int_cst_value (cbase);
3541       cost = difference_cost (data,
3542                               ubase, build_int_cst (utype, 0),
3543                               &symbol_present, &var_present, &offset,
3544                               depends_on);
3545     }
3546   else if (ratio == 1)
3547     {
3548       cost = difference_cost (data,
3549                               ubase, cbase,
3550                               &symbol_present, &var_present, &offset,
3551                               depends_on);
3552     }
3553   else
3554     {
3555       cost = force_var_cost (data, cbase, depends_on);
3556       cost.cost += add_cost (TYPE_MODE (ctype));
3557       cost = add_costs (cost,
3558                         difference_cost (data,
3559                                          ubase, build_int_cst (utype, 0),
3560                                          &symbol_present, &var_present,
3561                                          &offset, depends_on));
3562     }
3563
3564   /* If we are after the increment, the value of the candidate is higher by
3565      one iteration.  */
3566   if (stmt_after_increment (data->current_loop, cand, at))
3567     offset -= ratio * cstepi;
3568
3569   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3570      (symbol/var/const parts may be omitted).  If we are looking for an address,
3571      find the cost of addressing this.  */
3572   if (address_p)
3573     return add_costs (cost, get_address_cost (symbol_present, var_present,
3574                                 offset, ratio,
3575                                 TYPE_MODE (TREE_TYPE (*use->op_p))));
3576
3577   /* Otherwise estimate the costs for computing the expression.  */
3578   aratio = ratio > 0 ? ratio : -ratio;
3579   if (!symbol_present && !var_present && !offset)
3580     {
3581       if (ratio != 1)
3582         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3583
3584       return cost;
3585     }
3586
3587   if (aratio != 1)
3588     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3589
3590   n_sums = 1;
3591   if (var_present
3592       /* Symbol + offset should be compile-time computable.  */
3593       && (symbol_present || offset))
3594     n_sums++;
3595
3596   /* Having offset does not affect runtime cost in case it is added to
3597      symbol, but it increases complexity.  */
3598   if (offset)
3599     cost.complexity++;
3600
3601   cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3602   return cost;
3603
3604 fallback:
3605   {
3606     /* Just get the expression, expand it and measure the cost.  */
3607     tree comp = get_computation_at (data->current_loop, use, cand, at);
3608
3609     if (!comp)
3610       return infinite_cost;
3611
3612     if (address_p)
3613       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3614
3615     return new_cost (computation_cost (comp), 0);
3616   }
3617 }
3618
3619 /* Determines the cost of the computation by that USE is expressed
3620    from induction variable CAND.  If ADDRESS_P is true, we just need
3621    to create an address from it, otherwise we want to get it into
3622    register.  A set of invariants we depend on is stored in
3623    DEPENDS_ON.  */
3624
3625 static comp_cost
3626 get_computation_cost (struct ivopts_data *data,
3627                       struct iv_use *use, struct iv_cand *cand,
3628                       bool address_p, bitmap *depends_on)
3629 {
3630   return get_computation_cost_at (data,
3631                                   use, cand, address_p, depends_on, use->stmt);
3632 }
3633
3634 /* Determines cost of basing replacement of USE on CAND in a generic
3635    expression.  */
3636
3637 static bool
3638 determine_use_iv_cost_generic (struct ivopts_data *data,
3639                                struct iv_use *use, struct iv_cand *cand)
3640 {
3641   bitmap depends_on;
3642   comp_cost cost;
3643
3644   /* The simple case first -- if we need to express value of the preserved
3645      original biv, the cost is 0.  This also prevents us from counting the
3646      cost of increment twice -- once at this use and once in the cost of
3647      the candidate.  */
3648   if (cand->pos == IP_ORIGINAL
3649       && cand->incremented_at == use->stmt)
3650     {
3651       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3652       return true;
3653     }
3654
3655   cost = get_computation_cost (data, use, cand, false, &depends_on);
3656   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3657
3658   return !infinite_cost_p (cost);
3659 }
3660
3661 /* Determines cost of basing replacement of USE on CAND in an address.  */
3662
3663 static bool
3664 determine_use_iv_cost_address (struct ivopts_data *data,
3665                                struct iv_use *use, struct iv_cand *cand)
3666 {
3667   bitmap depends_on;
3668   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3669
3670   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3671
3672   return !infinite_cost_p (cost);
3673 }
3674
3675 /* Computes value of candidate CAND at position AT in iteration NITER, and
3676    stores it to VAL.  */
3677
3678 static void
3679 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3680                aff_tree *val)
3681 {
3682   aff_tree step, delta, nit;
3683   struct iv *iv = cand->iv;
3684   tree type = TREE_TYPE (iv->base);
3685   tree steptype = type;
3686   if (POINTER_TYPE_P (type))
3687     steptype = sizetype;
3688
3689   tree_to_aff_combination (iv->step, steptype, &step);
3690   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3691   aff_combination_convert (&nit, steptype);
3692   aff_combination_mult (&nit, &step, &delta);
3693   if (stmt_after_increment (loop, cand, at))
3694     aff_combination_add (&delta, &step);
3695
3696   tree_to_aff_combination (iv->base, type, val);
3697   aff_combination_add (val, &delta);
3698 }
3699
3700 /* Returns period of induction variable iv.  */
3701
3702 static tree
3703 iv_period (struct iv *iv)
3704 {
3705   tree step = iv->step, period, type;
3706   tree pow2div;
3707
3708   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3709
3710   /* Period of the iv is gcd (step, type range).  Since type range is power
3711      of two, it suffices to determine the maximum power of two that divides
3712      step.  */
3713   pow2div = num_ending_zeros (step);
3714   type = unsigned_type_for (TREE_TYPE (step));
3715
3716   period = build_low_bits_mask (type,
3717                                 (TYPE_PRECISION (type)
3718                                  - tree_low_cst (pow2div, 1)));
3719
3720   return period;
3721 }
3722
3723 /* Returns the comparison operator used when eliminating the iv USE.  */
3724
3725 static enum tree_code
3726 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3727 {
3728   struct loop *loop = data->current_loop;
3729   basic_block ex_bb;
3730   edge exit;
3731
3732   ex_bb = bb_for_stmt (use->stmt);
3733   exit = EDGE_SUCC (ex_bb, 0);
3734   if (flow_bb_inside_loop_p (loop, exit->dest))
3735     exit = EDGE_SUCC (ex_bb, 1);
3736
3737   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3738 }
3739
3740 /* Check whether it is possible to express the condition in USE by comparison
3741    of candidate CAND.  If so, store the value compared with to BOUND.  */
3742
3743 static bool
3744 may_eliminate_iv (struct ivopts_data *data,
3745                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3746 {
3747   basic_block ex_bb;
3748   edge exit;
3749   tree nit, period;
3750   struct loop *loop = data->current_loop;
3751   aff_tree bnd;
3752   double_int period_value, max_niter;
3753
3754   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3755     return false;
3756
3757   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3758      for other conditions inside loop body.  */
3759   ex_bb = bb_for_stmt (use->stmt);
3760   if (use->stmt != last_stmt (ex_bb)
3761       || TREE_CODE (use->stmt) != COND_EXPR)
3762     return false;
3763   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3764     return false;
3765
3766   exit = EDGE_SUCC (ex_bb, 0);
3767   if (flow_bb_inside_loop_p (loop, exit->dest))
3768     exit = EDGE_SUCC (ex_bb, 1);
3769   if (flow_bb_inside_loop_p (loop, exit->dest))
3770     return false;
3771
3772   nit = niter_for_exit (data, exit);
3773   if (!nit)
3774     return false;
3775
3776   /* Determine whether we may use the variable to test whether niter iterations
3777      elapsed.  This is the case iff the period of the induction variable is
3778      greater than the number of iterations.  */
3779   period = iv_period (cand->iv);
3780   if (!period)
3781     return false;
3782
3783   /* Compare the period with the estimate on the number of iterations of the
3784      loop.  */
3785   if (!estimated_loop_iterations (loop, true, &max_niter))
3786     return false;
3787   period_value = tree_to_double_int (period);
3788   if (double_int_ucmp (period_value, max_niter) <= 0)
3789     return false;
3790
3791   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3792   *bound = aff_combination_to_tree (&bnd);
3793   return true;
3794 }
3795
3796 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3797
3798 static bool
3799 determine_use_iv_cost_condition (struct ivopts_data *data,
3800                                  struct iv_use *use, struct iv_cand *cand)
3801 {
3802   tree bound = NULL_TREE;
3803   struct iv *cmp_iv;
3804   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3805   comp_cost elim_cost, express_cost, cost;
3806   bool ok;
3807
3808   /* Only consider real candidates.  */
3809   if (!cand->iv)
3810     {
3811       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3812       return false;
3813     }
3814
3815   /* Try iv elimination.  */
3816   if (may_eliminate_iv (data, use, cand, &bound))
3817     {
3818       elim_cost = force_var_cost (data, bound, &depends_on_elim);
3819       /* The bound is a loop invariant, so it will be only computed
3820          once.  */
3821       elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3822     }
3823   else
3824     elim_cost = infinite_cost;
3825
3826   /* Try expressing the original giv.  If it is compared with an invariant,
3827      note that we cannot get rid of it.  */
3828   ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3829   gcc_assert (ok);
3830
3831   express_cost = get_computation_cost (data, use, cand, false,
3832                                        &depends_on_express);
3833   fd_ivopts_data = data;
3834   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3835
3836   /* Choose the better approach.  */
3837   if (compare_costs (elim_cost, express_cost) < 0)
3838     {
3839       cost = elim_cost;
3840       depends_on = depends_on_elim;
3841       depends_on_elim = NULL;
3842     }
3843   else
3844     {
3845       cost = express_cost;
3846       depends_on = depends_on_express;
3847       depends_on_express = NULL;
3848       bound = NULL_TREE;
3849     }
3850
3851   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3852
3853   if (depends_on_elim)
3854     BITMAP_FREE (depends_on_elim);
3855   if (depends_on_express)
3856     BITMAP_FREE (depends_on_express);
3857
3858   return !infinite_cost_p (cost);
3859 }
3860
3861 /* Determines cost of basing replacement of USE on CAND.  Returns false
3862    if USE cannot be based on CAND.  */
3863
3864 static bool
3865 determine_use_iv_cost (struct ivopts_data *data,
3866                        struct iv_use *use, struct iv_cand *cand)
3867 {
3868   switch (use->type)
3869     {
3870     case USE_NONLINEAR_EXPR:
3871       return determine_use_iv_cost_generic (data, use, cand);
3872
3873     case USE_ADDRESS:
3874       return determine_use_iv_cost_address (data, use, cand);
3875
3876     case USE_COMPARE:
3877       return determine_use_iv_cost_condition (data, use, cand);
3878
3879     default:
3880       gcc_unreachable ();
3881     }
3882 }
3883
3884 /* Determines costs of basing the use of the iv on an iv candidate.  */
3885
3886 static void
3887 determine_use_iv_costs (struct ivopts_data *data)
3888 {
3889   unsigned i, j;
3890   struct iv_use *use;
3891   struct iv_cand *cand;
3892   bitmap to_clear = BITMAP_ALLOC (NULL);
3893
3894   alloc_use_cost_map (data);
3895
3896   for (i = 0; i < n_iv_uses (data); i++)
3897     {
3898       use = iv_use (data, i);
3899
3900       if (data->consider_all_candidates)
3901         {
3902           for (j = 0; j < n_iv_cands (data); j++)
3903             {
3904               cand = iv_cand (data, j);
3905               determine_use_iv_cost (data, use, cand);
3906             }
3907         }
3908       else
3909         {
3910           bitmap_iterator bi;
3911
3912           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3913             {
3914               cand = iv_cand (data, j);
3915               if (!determine_use_iv_cost (data, use, cand))
3916                 bitmap_set_bit (to_clear, j);
3917             }
3918
3919           /* Remove the candidates for that the cost is infinite from
3920              the list of related candidates.  */
3921           bitmap_and_compl_into (use->related_cands, to_clear);
3922           bitmap_clear (to_clear);
3923         }
3924     }
3925
3926   BITMAP_FREE (to_clear);
3927
3928   if (dump_file && (dump_flags & TDF_DETAILS))
3929     {
3930       fprintf (dump_file, "Use-candidate costs:\n");
3931
3932       for (i = 0; i < n_iv_uses (data); i++)
3933         {
3934           use = iv_use (data, i);
3935
3936           fprintf (dump_file, "Use %d:\n", i);
3937           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
3938           for (j = 0; j < use->n_map_members; j++)
3939             {
3940               if (!use->cost_map[j].cand
3941                   || infinite_cost_p (use->cost_map[j].cost))
3942                 continue;
3943
3944               fprintf (dump_file, "  %d\t%d\t%d\t",
3945                        use->cost_map[j].cand->id,
3946                        use->cost_map[j].cost.cost,
3947                        use->cost_map[j].cost.complexity);
3948               if (use->cost_map[j].depends_on)
3949                 bitmap_print (dump_file,
3950                               use->cost_map[j].depends_on, "","");
3951               fprintf (dump_file, "\n");
3952             }
3953
3954           fprintf (dump_file, "\n");
3955         }
3956       fprintf (dump_file, "\n");
3957     }
3958 }
3959
3960 /* Determines cost of the candidate CAND.  */
3961
3962 static void
3963 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3964 {
3965   comp_cost cost_base;
3966   unsigned cost, cost_step;
3967   tree base;
3968
3969   if (!cand->iv)
3970     {
3971       cand->cost = 0;
3972       return;
3973     }
3974
3975   /* There are two costs associated with the candidate -- its increment
3976      and its initialization.  The second is almost negligible for any loop
3977      that rolls enough, so we take it just very little into account.  */
3978
3979   base = cand->iv->base;
3980   cost_base = force_var_cost (data, base, NULL);
3981   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3982
3983   cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
3984
3985   /* Prefer the original ivs unless we may gain something by replacing it.
3986      The reason is to makee debugging simpler; so this is not relevant for
3987      artificial ivs created by other optimization passes.  */
3988   if (cand->pos != IP_ORIGINAL
3989       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3990     cost++;
3991
3992   /* Prefer not to insert statements into latch unless there are some
3993      already (so that we do not create unnecessary jumps).  */
3994   if (cand->pos == IP_END
3995       && empty_block_p (ip_end_pos (data->current_loop)))
3996     cost++;
3997
3998   cand->cost = cost;
3999 }
4000
4001 /* Determines costs of computation of the candidates.  */
4002
4003 static void
4004 determine_iv_costs (struct ivopts_data *data)
4005 {
4006   unsigned i;
4007
4008   if (dump_file && (dump_flags & TDF_DETAILS))
4009     {
4010       fprintf (dump_file, "Candidate costs:\n");
4011       fprintf (dump_file, "  cand\tcost\n");
4012     }
4013
4014   for (i = 0; i < n_iv_cands (data); i++)
4015     {
4016       struct iv_cand *cand = iv_cand (data, i);
4017
4018       determine_iv_cost (data, cand);
4019
4020       if (dump_file && (dump_flags & TDF_DETAILS))
4021         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4022     }
4023
4024   if (dump_file && (dump_flags & TDF_DETAILS))
4025     fprintf (dump_file, "\n");
4026 }
4027
4028 /* Calculates cost for having SIZE induction variables.  */
4029
4030 static unsigned
4031 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4032 {
4033   /* We add size to the cost, so that we prefer eliminating ivs
4034      if possible.  */
4035   return size + estimate_reg_pressure_cost (size, data->regs_used);
4036 }
4037
4038 /* For each size of the induction variable set determine the penalty.  */
4039
4040 static void
4041 determine_set_costs (struct ivopts_data *data)
4042 {
4043   unsigned j, n;
4044   tree phi, op;
4045   struct loop *loop = data->current_loop;
4046   bitmap_iterator bi;
4047
4048   /* We use the following model (definitely improvable, especially the
4049      cost function -- TODO):
4050
4051      We estimate the number of registers available (using MD data), name it A.
4052
4053      We estimate the number of registers used by the loop, name it U.  This
4054      number is obtained as the number of loop phi nodes (not counting virtual
4055      registers and bivs) + the number of variables from outside of the loop.
4056
4057      We set a reserve R (free regs that are used for temporary computations,
4058      etc.).  For now the reserve is a constant 3.
4059
4060      Let I be the number of induction variables.
4061
4062      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4063         make a lot of ivs without a reason).
4064      -- if A - R < U + I <= A, the cost is I * PRES_COST
4065      -- if U + I > A, the cost is I * PRES_COST and
4066         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4067
4068   if (dump_file && (dump_flags & TDF_DETAILS))
4069     {
4070       fprintf (dump_file, "Global costs:\n");
4071       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4072       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost);
4073       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4074     }
4075
4076   n = 0;
4077   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4078     {
4079       op = PHI_RESULT (phi);
4080
4081       if (!is_gimple_reg (op))
4082         continue;
4083
4084       if (get_iv (data, op))
4085         continue;
4086
4087       n++;
4088     }
4089
4090   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4091     {
4092       struct version_info *info = ver_info (data, j);
4093
4094       if (info->inv_id && info->has_nonlin_use)
4095         n++;
4096     }
4097
4098   data->regs_used = n;
4099   if (dump_file && (dump_flags & TDF_DETAILS))
4100     fprintf (dump_file, "  regs_used %d\n", n);
4101
4102   if (dump_file && (dump_flags & TDF_DETAILS))
4103     {
4104       fprintf (dump_file, "  cost for size:\n");
4105       fprintf (dump_file, "  ivs\tcost\n");
4106       for (j = 0; j <= 2 * target_avail_regs; j++)
4107         fprintf (dump_file, "  %d\t%d\n", j,
4108                  ivopts_global_cost_for_size (data, j));
4109       fprintf (dump_file, "\n");
4110     }
4111 }
4112
4113 /* Returns true if A is a cheaper cost pair than B.  */
4114
4115 static bool
4116 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4117 {
4118   int cmp;
4119
4120   if (!a)
4121     return false;
4122
4123   if (!b)
4124     return true;
4125
4126   cmp = compare_costs (a->cost, b->cost);
4127   if (cmp < 0)
4128     return true;
4129
4130   if (cmp > 0)
4131     return false;
4132
4133   /* In case the costs are the same, prefer the cheaper candidate.  */
4134   if (a->cand->cost < b->cand->cost)
4135     return true;
4136
4137   return false;
4138 }
4139
4140 /* Computes the cost field of IVS structure.  */
4141
4142 static void
4143 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4144 {
4145   comp_cost cost = ivs->cand_use_cost;
4146   cost.cost += ivs->cand_cost;
4147   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4148
4149   ivs->cost = cost;
4150 }
4151
4152 /* Remove invariants in set INVS to set IVS.  */
4153
4154 static void
4155 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4156 {
4157   bitmap_iterator bi;
4158   unsigned iid;
4159
4160   if (!invs)
4161     return;
4162
4163   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4164     {
4165       ivs->n_invariant_uses[iid]--;
4166       if (ivs->n_invariant_uses[iid] == 0)
4167         ivs->n_regs--;
4168     }
4169 }
4170
4171 /* Set USE not to be expressed by any candidate in IVS.  */
4172
4173 static void
4174 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4175                  struct iv_use *use)
4176 {
4177   unsigned uid = use->id, cid;
4178   struct cost_pair *cp;
4179
4180   cp = ivs->cand_for_use[uid];
4181   if (!cp)
4182     return;
4183   cid = cp->cand->id;
4184
4185   ivs->bad_uses++;
4186   ivs->cand_for_use[uid] = NULL;
4187   ivs->n_cand_uses[cid]--;
4188
4189   if (ivs->n_cand_uses[cid] == 0)
4190     {
4191       bitmap_clear_bit (ivs->cands, cid);
4192       /* Do not count the pseudocandidates.  */
4193       if (cp->cand->iv)
4194         ivs->n_regs--;
4195       ivs->n_cands--;
4196       ivs->cand_cost -= cp->cand->cost;
4197
4198       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4199     }
4200
4201   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4202
4203   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4204   iv_ca_recount_cost (data, ivs);
4205 }
4206
4207 /* Add invariants in set INVS to set IVS.  */
4208
4209 static void
4210 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4211 {
4212   bitmap_iterator bi;
4213   unsigned iid;
4214
4215   if (!invs)
4216     return;
4217
4218   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4219     {
4220       ivs->n_invariant_uses[iid]++;
4221       if (ivs->n_invariant_uses[iid] == 1)
4222         ivs->n_regs++;
4223     }
4224 }
4225
4226 /* Set cost pair for USE in set IVS to CP.  */
4227
4228 static void
4229 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4230               struct iv_use *use, struct cost_pair *cp)
4231 {
4232   unsigned uid = use->id, cid;
4233
4234   if (ivs->cand_for_use[uid] == cp)
4235     return;
4236
4237   if (ivs->cand_for_use[uid])
4238     iv_ca_set_no_cp (data, ivs, use);
4239
4240   if (cp)
4241     {
4242       cid = cp->cand->id;
4243
4244       ivs->bad_uses--;
4245       ivs->cand_for_use[uid] = cp;
4246       ivs->n_cand_uses[cid]++;
4247       if (ivs->n_cand_uses[cid] == 1)
4248         {
4249           bitmap_set_bit (ivs->cands, cid);
4250           /* Do not count the pseudocandidates.  */
4251           if (cp->cand->iv)
4252             ivs->n_regs++;
4253           ivs->n_cands++;
4254           ivs->cand_cost += cp->cand->cost;
4255
4256           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4257         }
4258
4259       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4260       iv_ca_set_add_invariants (ivs, cp->depends_on);
4261       iv_ca_recount_cost (data, ivs);
4262     }
4263 }
4264
4265 /* Extend set IVS by expressing USE by some of the candidates in it
4266    if possible.  */
4267
4268 static void
4269 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4270                struct iv_use *use)
4271 {
4272   struct cost_pair *best_cp = NULL, *cp;
4273   bitmap_iterator bi;
4274   unsigned i;
4275
4276   gcc_assert (ivs->upto >= use->id);
4277
4278   if (ivs->upto == use->id)
4279     {
4280       ivs->upto++;
4281       ivs->bad_uses++;
4282     }
4283
4284   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4285     {
4286       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4287
4288       if (cheaper_cost_pair (cp, best_cp))
4289         best_cp = cp;
4290     }
4291
4292   iv_ca_set_cp (data, ivs, use, best_cp);
4293 }
4294
4295 /* Get cost for assignment IVS.  */
4296
4297 static comp_cost
4298 iv_ca_cost (struct iv_ca *ivs)
4299 {
4300   return (ivs->bad_uses ? infinite_cost : ivs->cost);
4301 }
4302
4303 /* Returns true if all dependences of CP are among invariants in IVS.  */
4304
4305 static bool
4306 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4307 {
4308   unsigned i;
4309   bitmap_iterator bi;
4310
4311   if (!cp->depends_on)
4312     return true;
4313
4314   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4315     {
4316       if (ivs->n_invariant_uses[i] == 0)
4317         return false;
4318     }
4319
4320   return true;
4321 }
4322
4323 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4324    it before NEXT_CHANGE.  */
4325
4326 static struct iv_ca_delta *
4327 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4328                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4329 {
4330   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4331
4332   change->use = use;
4333   change->old_cp = old_cp;
4334   change->new_cp = new_cp;
4335   change->next_change = next_change;
4336
4337   return change;
4338 }
4339
4340 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4341    are rewritten.  */
4342
4343 static struct iv_ca_delta *
4344 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4345 {
4346   struct iv_ca_delta *last;
4347
4348   if (!l2)
4349     return l1;
4350
4351   if (!l1)
4352     return l2;
4353
4354   for (last = l1; last->next_change; last = last->next_change)
4355     continue;
4356   last->next_change = l2;
4357
4358   return l1;
4359 }
4360
4361 /* Returns candidate by that USE is expressed in IVS.  */
4362
4363 static struct cost_pair *
4364 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4365 {
4366   return ivs->cand_for_use[use->id];
4367 }
4368
4369 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4370
4371 static struct iv_ca_delta *
4372 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4373 {
4374   struct iv_ca_delta *act, *next, *prev = NULL;
4375   struct cost_pair *tmp;
4376
4377   for (act = delta; act; act = next)
4378     {
4379       next = act->next_change;
4380       act->next_change = prev;
4381       prev = act;
4382
4383       tmp = act->old_cp;
4384       act->old_cp = act->new_cp;
4385       act->new_cp = tmp;
4386     }
4387
4388   return prev;
4389 }
4390
4391 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4392    reverted instead.  */
4393
4394 static void
4395 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4396                     struct iv_ca_delta *delta, bool forward)
4397 {
4398   struct cost_pair *from, *to;
4399   struct iv_ca_delta *act;
4400
4401   if (!forward)
4402     delta = iv_ca_delta_reverse (delta);
4403
4404   for (act = delta; act; act = act->next_change)
4405     {
4406       from = act->old_cp;
4407       to = act->new_cp;
4408       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4409       iv_ca_set_cp (data, ivs, act->use, to);
4410     }
4411
4412   if (!forward)
4413     iv_ca_delta_reverse (delta);
4414 }
4415
4416 /* Returns true if CAND is used in IVS.  */
4417
4418 static bool
4419 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4420 {
4421   return ivs->n_cand_uses[cand->id] > 0;
4422 }
4423
4424 /* Returns number of induction variable candidates in the set IVS.  */
4425
4426 static unsigned
4427 iv_ca_n_cands (struct iv_ca *ivs)
4428 {
4429   return ivs->n_cands;
4430 }
4431
4432 /* Free the list of changes DELTA.  */
4433
4434 static void
4435 iv_ca_delta_free (struct iv_ca_delta **delta)
4436 {
4437   struct iv_ca_delta *act, *next;
4438
4439   for (act = *delta; act; act = next)
4440     {
4441       next = act->next_change;
4442       free (act);
4443     }
4444
4445   *delta = NULL;
4446 }
4447
4448 /* Allocates new iv candidates assignment.  */
4449
4450 static struct iv_ca *
4451 iv_ca_new (struct ivopts_data *data)
4452 {
4453   struct iv_ca *nw = XNEW (struct iv_ca);
4454
4455   nw->upto = 0;
4456   nw->bad_uses = 0;
4457   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4458   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4459   nw->cands = BITMAP_ALLOC (NULL);
4460   nw->n_cands = 0;
4461   nw->n_regs = 0;
4462   nw->cand_use_cost = zero_cost;
4463   nw->cand_cost = 0;
4464   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4465   nw->cost = zero_cost;
4466
4467   return nw;
4468 }
4469
4470 /* Free memory occupied by the set IVS.  */
4471
4472 static void
4473 iv_ca_free (struct iv_ca **ivs)
4474 {
4475   free ((*ivs)->cand_for_use);
4476   free ((*ivs)->n_cand_uses);
4477   BITMAP_FREE ((*ivs)->cands);
4478   free ((*ivs)->n_invariant_uses);
4479   free (*ivs);
4480   *ivs = NULL;
4481 }
4482
4483 /* Dumps IVS to FILE.  */
4484
4485 static void
4486 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4487 {
4488   const char *pref = "  invariants ";
4489   unsigned i;
4490   comp_cost cost = iv_ca_cost (ivs);
4491
4492   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4493   bitmap_print (file, ivs->cands, "  candidates ","\n");
4494
4495   for (i = 1; i <= data->max_inv_id; i++)
4496     if (ivs->n_invariant_uses[i])
4497       {
4498         fprintf (file, "%s%d", pref, i);
4499         pref = ", ";
4500       }
4501   fprintf (file, "\n");
4502 }
4503
4504 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4505    new set, and store differences in DELTA.  Number of induction variables
4506    in the new set is stored to N_IVS.  */
4507
4508 static comp_cost
4509 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4510               struct iv_cand *cand, struct iv_ca_delta **delta,
4511               unsigned *n_ivs)
4512 {
4513   unsigned i;
4514   comp_cost cost;
4515   struct iv_use *use;
4516   struct cost_pair *old_cp, *new_cp;
4517
4518   *delta = NULL;
4519   for (i = 0; i < ivs->upto; i++)
4520     {
4521       use = iv_use (data, i);
4522       old_cp = iv_ca_cand_for_use (ivs, use);
4523
4524       if (old_cp
4525           && old_cp->cand == cand)
4526         continue;
4527
4528       new_cp = get_use_iv_cost (data, use, cand);
4529       if (!new_cp)
4530         continue;
4531
4532       if (!iv_ca_has_deps (ivs, new_cp))
4533         continue;
4534
4535       if (!cheaper_cost_pair (new_cp, old_cp))
4536         continue;
4537
4538       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4539     }
4540
4541   iv_ca_delta_commit (data, ivs, *delta, true);
4542   cost = iv_ca_cost (ivs);
4543   if (n_ivs)
4544     *n_ivs = iv_ca_n_cands (ivs);
4545   iv_ca_delta_commit (data, ivs, *delta, false);
4546
4547   return cost;
4548 }
4549
4550 /* Try narrowing set IVS by removing CAND.  Return the cost of
4551    the new set and store the differences in DELTA.  */
4552
4553 static comp_cost
4554 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4555               struct iv_cand *cand, struct iv_ca_delta **delta)
4556 {
4557   unsigned i, ci;
4558   struct iv_use *use;
4559   struct cost_pair *old_cp, *new_cp, *cp;
4560   bitmap_iterator bi;
4561   struct iv_cand *cnd;
4562   comp_cost cost;
4563
4564   *delta = NULL;
4565   for (i = 0; i < n_iv_uses (data); i++)
4566     {
4567       use = iv_use (data, i);
4568
4569       old_cp = iv_ca_cand_for_use (ivs, use);
4570       if (old_cp->cand != cand)
4571         continue;
4572
4573       new_cp = NULL;
4574
4575       if (data->consider_all_candidates)
4576         {
4577           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4578             {
4579               if (ci == cand->id)
4580                 continue;
4581
4582               cnd = iv_cand (data, ci);
4583
4584               cp = get_use_iv_cost (data, use, cnd);
4585               if (!cp)
4586                 continue;
4587               if (!iv_ca_has_deps (ivs, cp))
4588                 continue;
4589
4590               if (!cheaper_cost_pair (cp, new_cp))
4591                 continue;
4592
4593               new_cp = cp;
4594             }
4595         }
4596       else
4597         {
4598           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4599             {
4600               if (ci == cand->id)
4601                 continue;
4602
4603               cnd = iv_cand (data, ci);
4604
4605               cp = get_use_iv_cost (data, use, cnd);
4606               if (!cp)
4607                 continue;
4608               if (!iv_ca_has_deps (ivs, cp))
4609                 continue;
4610
4611               if (!cheaper_cost_pair (cp, new_cp))
4612                 continue;
4613
4614               new_cp = cp;
4615             }
4616         }
4617
4618       if (!new_cp)
4619         {
4620           iv_ca_delta_free (delta);
4621           return infinite_cost;
4622         }
4623
4624       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4625     }
4626
4627   iv_ca_delta_commit (data, ivs, *delta, true);
4628   cost = iv_ca_cost (ivs);
4629   iv_ca_delta_commit (data, ivs, *delta, false);
4630
4631   return cost;
4632 }
4633
4634 /* Try optimizing the set of candidates IVS by removing candidates different
4635    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4636    differences in DELTA.  */
4637
4638 static comp_cost
4639 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4640              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4641 {
4642   bitmap_iterator bi;
4643   struct iv_ca_delta *act_delta, *best_delta;
4644   unsigned i;
4645   comp_cost best_cost, acost;
4646   struct iv_cand *cand;
4647
4648   best_delta = NULL;
4649   best_cost = iv_ca_cost (ivs);
4650
4651   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4652     {
4653       cand = iv_cand (data, i);
4654
4655       if (cand == except_cand)
4656         continue;
4657
4658       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4659
4660       if (compare_costs (acost, best_cost) < 0)
4661         {
4662           best_cost = acost;
4663           iv_ca_delta_free (&best_delta);
4664           best_delta = act_delta;
4665         }
4666       else
4667         iv_ca_delta_free (&act_delta);
4668     }
4669
4670   if (!best_delta)
4671     {
4672       *delta = NULL;
4673       return best_cost;
4674     }
4675
4676   /* Recurse to possibly remove other unnecessary ivs.  */
4677   iv_ca_delta_commit (data, ivs, best_delta, true);
4678   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4679   iv_ca_delta_commit (data, ivs, best_delta, false);
4680   *delta = iv_ca_delta_join (best_delta, *delta);
4681   return best_cost;
4682 }
4683
4684 /* Tries to extend the sets IVS in the best possible way in order
4685    to express the USE.  */
4686
4687 static bool
4688 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4689                   struct iv_use *use)
4690 {
4691   comp_cost best_cost, act_cost;
4692   unsigned i;
4693   bitmap_iterator bi;
4694   struct iv_cand *cand;
4695   struct iv_ca_delta *best_delta = NULL, *act_delta;
4696   struct cost_pair *cp;
4697
4698   iv_ca_add_use (data, ivs, use);
4699   best_cost = iv_ca_cost (ivs);
4700
4701   cp = iv_ca_cand_for_use (ivs, use);
4702   if (cp)
4703     {
4704       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4705       iv_ca_set_no_cp (data, ivs, use);
4706     }
4707
4708   /* First try important candidates not based on any memory object.  Only if
4709      this fails, try the specific ones.  Rationale -- in loops with many
4710      variables the best choice often is to use just one generic biv.  If we
4711      added here many ivs specific to the uses, the optimization algorithm later
4712      would be likely to get stuck in a local minimum, thus causing us to create
4713      too many ivs.  The approach from few ivs to more seems more likely to be
4714      successful -- starting from few ivs, replacing an expensive use by a
4715      specific iv should always be a win.  */
4716   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4717     {
4718       cand = iv_cand (data, i);
4719
4720       if (cand->iv->base_object != NULL_TREE)
4721         continue;
4722
4723       if (iv_ca_cand_used_p (ivs, cand))
4724         continue;
4725
4726       cp = get_use_iv_cost (data, use, cand);
4727       if (!cp)
4728         continue;
4729
4730       iv_ca_set_cp (data, ivs, use, cp);
4731       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4732       iv_ca_set_no_cp (data, ivs, use);
4733       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4734
4735       if (compare_costs (act_cost, best_cost) < 0)
4736         {
4737           best_cost = act_cost;
4738
4739           iv_ca_delta_free (&best_delta);
4740           best_delta = act_delta;
4741         }
4742       else
4743         iv_ca_delta_free (&act_delta);
4744     }
4745
4746   if (infinite_cost_p (best_cost))
4747     {
4748       for (i = 0; i < use->n_map_members; i++)
4749         {
4750           cp = use->cost_map + i;
4751           cand = cp->cand;
4752           if (!cand)
4753             continue;
4754
4755           /* Already tried this.  */
4756           if (cand->important && cand->iv->base_object == NULL_TREE)
4757             continue;
4758
4759           if (iv_ca_cand_used_p (ivs, cand))
4760             continue;
4761
4762           act_delta = NULL;
4763           iv_ca_set_cp (data, ivs, use, cp);
4764           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4765           iv_ca_set_no_cp (data, ivs, use);
4766           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4767                                        cp, act_delta);
4768
4769           if (compare_costs (act_cost, best_cost) < 0)
4770             {
4771               best_cost = act_cost;
4772
4773               if (best_delta)
4774                 iv_ca_delta_free (&best_delta);
4775               best_delta = act_delta;
4776             }
4777           else
4778             iv_ca_delta_free (&act_delta);
4779         }
4780     }
4781
4782   iv_ca_delta_commit (data, ivs, best_delta, true);
4783   iv_ca_delta_free (&best_delta);
4784
4785   return !infinite_cost_p (best_cost);
4786 }
4787
4788 /* Finds an initial assignment of candidates to uses.  */
4789
4790 static struct iv_ca *
4791 get_initial_solution (struct ivopts_data *data)
4792 {
4793   struct iv_ca *ivs = iv_ca_new (data);
4794   unsigned i;
4795
4796   for (i = 0; i < n_iv_uses (data); i++)
4797     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4798       {
4799         iv_ca_free (&ivs);
4800         return NULL;
4801       }
4802
4803   return ivs;
4804 }
4805
4806 /* Tries to improve set of induction variables IVS.  */
4807
4808 static bool
4809 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4810 {
4811   unsigned i, n_ivs;
4812   comp_cost acost, best_cost = iv_ca_cost (ivs);
4813   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4814   struct iv_cand *cand;
4815
4816   /* Try extending the set of induction variables by one.  */
4817   for (i = 0; i < n_iv_cands (data); i++)
4818     {
4819       cand = iv_cand (data, i);
4820
4821       if (iv_ca_cand_used_p (ivs, cand))
4822         continue;
4823
4824       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4825       if (!act_delta)
4826         continue;
4827
4828       /* If we successfully added the candidate and the set is small enough,
4829          try optimizing it by removing other candidates.  */
4830       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4831         {
4832           iv_ca_delta_commit (data, ivs, act_delta, true);
4833           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4834           iv_ca_delta_commit (data, ivs, act_delta, false);
4835           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4836         }
4837
4838       if (compare_costs (acost, best_cost) < 0)
4839         {
4840           best_cost = acost;
4841           iv_ca_delta_free (&best_delta);
4842           best_delta = act_delta;
4843         }
4844       else
4845         iv_ca_delta_free (&act_delta);
4846     }
4847
4848   if (!best_delta)
4849     {
4850       /* Try removing the candidates from the set instead.  */
4851       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4852
4853       /* Nothing more we can do.  */
4854       if (!best_delta)
4855         return false;
4856     }
4857
4858   iv_ca_delta_commit (data, ivs, best_delta, true);
4859   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4860   iv_ca_delta_free (&best_delta);
4861   return true;
4862 }
4863
4864 /* Attempts to find the optimal set of induction variables.  We do simple
4865    greedy heuristic -- we try to replace at most one candidate in the selected
4866    solution and remove the unused ivs while this improves the cost.  */
4867
4868 static struct iv_ca *
4869 find_optimal_iv_set (struct ivopts_data *data)
4870 {
4871   unsigned i;
4872   struct iv_ca *set;
4873   struct iv_use *use;
4874
4875   /* Get the initial solution.  */
4876   set = get_initial_solution (data);
4877   if (!set)
4878     {
4879       if (dump_file && (dump_flags & TDF_DETAILS))
4880         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4881       return NULL;
4882     }
4883
4884   if (dump_file && (dump_flags & TDF_DETAILS))
4885     {
4886       fprintf (dump_file, "Initial set of candidates:\n");
4887       iv_ca_dump (data, dump_file, set);
4888     }
4889
4890   while (try_improve_iv_set (data, set))
4891     {
4892       if (dump_file && (dump_flags & TDF_DETAILS))
4893         {
4894           fprintf (dump_file, "Improved to:\n");
4895           iv_ca_dump (data, dump_file, set);
4896         }
4897     }
4898
4899   if (dump_file && (dump_flags & TDF_DETAILS))
4900     {
4901       comp_cost cost = iv_ca_cost (set);
4902       fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4903     }
4904
4905   for (i = 0; i < n_iv_uses (data); i++)
4906     {
4907       use = iv_use (data, i);
4908       use->selected = iv_ca_cand_for_use (set, use)->cand;
4909     }
4910
4911   return set;
4912 }
4913
4914 /* Creates a new induction variable corresponding to CAND.  */
4915
4916 static void
4917 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4918 {
4919   block_stmt_iterator incr_pos;
4920   tree base;
4921   bool after = false;
4922
4923   if (!cand->iv)
4924     return;
4925
4926   switch (cand->pos)
4927     {
4928     case IP_NORMAL:
4929       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4930       break;
4931
4932     case IP_END:
4933       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4934       after = true;
4935       break;
4936
4937     case IP_ORIGINAL:
4938       /* Mark that the iv is preserved.  */
4939       name_info (data, cand->var_before)->preserve_biv = true;
4940       name_info (data, cand->var_after)->preserve_biv = true;
4941
4942       /* Rewrite the increment so that it uses var_before directly.  */
4943       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4944
4945       return;
4946     }
4947
4948   gimple_add_tmp_var (cand->var_before);
4949   add_referenced_var (cand->var_before);
4950
4951   base = unshare_expr (cand->iv->base);
4952
4953   create_iv (base, unshare_expr (cand->iv->step),
4954              cand->var_before, data->current_loop,
4955              &incr_pos, after, &cand->var_before, &cand->var_after);
4956 }
4957
4958 /* Creates new induction variables described in SET.  */
4959
4960 static void
4961 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4962 {
4963   unsigned i;
4964   struct iv_cand *cand;
4965   bitmap_iterator bi;
4966
4967   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4968     {
4969       cand = iv_cand (data, i);
4970       create_new_iv (data, cand);
4971     }
4972 }
4973
4974 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4975    is true, remove also the ssa name defined by the statement.  */
4976
4977 static void
4978 remove_statement (tree stmt, bool including_defined_name)
4979 {
4980   if (TREE_CODE (stmt) == PHI_NODE)
4981     {
4982       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4983     }
4984   else
4985     {
4986       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4987
4988       bsi_remove (&bsi, true);
4989       release_defs (stmt);
4990     }
4991 }
4992
4993 /* Rewrites USE (definition of iv used in a nonlinear expression)
4994    using candidate CAND.  */
4995
4996 static void
4997 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4998                             struct iv_use *use, struct iv_cand *cand)
4999 {
5000   tree comp;
5001   tree op, tgt, ass;
5002   block_stmt_iterator bsi;
5003
5004   /* An important special case -- if we are asked to express value of
5005      the original iv by itself, just exit; there is no need to
5006      introduce a new computation (that might also need casting the
5007      variable to unsigned and back).  */
5008   if (cand->pos == IP_ORIGINAL
5009       && cand->incremented_at == use->stmt)
5010     {
5011       tree step, ctype, utype;
5012       enum tree_code incr_code = PLUS_EXPR;
5013
5014       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
5015       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
5016
5017       step = cand->iv->step;
5018       ctype = TREE_TYPE (step);
5019       utype = TREE_TYPE (cand->var_after);
5020       if (TREE_CODE (step) == NEGATE_EXPR)
5021         {
5022           incr_code = MINUS_EXPR;
5023           step = TREE_OPERAND (step, 0);
5024         }
5025
5026       /* Check whether we may leave the computation unchanged.
5027          This is the case only if it does not rely on other
5028          computations in the loop -- otherwise, the computation
5029          we rely upon may be removed in remove_unused_ivs,
5030          thus leading to ICE.  */
5031       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
5032       if (TREE_CODE (op) == PLUS_EXPR
5033           || TREE_CODE (op) == MINUS_EXPR
5034           || TREE_CODE (op) == POINTER_PLUS_EXPR)
5035         {
5036           if (TREE_OPERAND (op, 0) == cand->var_before)
5037             op = TREE_OPERAND (op, 1);
5038           else if (TREE_CODE (op) != MINUS_EXPR
5039                    && TREE_OPERAND (op, 1) == cand->var_before)
5040             op = TREE_OPERAND (op, 0);
5041           else
5042             op = NULL_TREE;
5043         }
5044       else
5045         op = NULL_TREE;
5046
5047       if (op
5048           && (TREE_CODE (op) == INTEGER_CST
5049               || operand_equal_p (op, step, 0)))
5050         return;
5051
5052       /* Otherwise, add the necessary computations to express
5053          the iv.  */
5054       op = fold_convert (ctype, cand->var_before);
5055       comp = fold_convert (utype,
5056                            build2 (incr_code, ctype, op,
5057                                    unshare_expr (step)));
5058     }
5059   else
5060     {
5061       comp = get_computation (data->current_loop, use, cand);
5062       gcc_assert (comp != NULL_TREE);
5063     }
5064
5065   switch (TREE_CODE (use->stmt))
5066     {
5067     case PHI_NODE:
5068       tgt = PHI_RESULT (use->stmt);
5069
5070       /* If we should keep the biv, do not replace it.  */
5071       if (name_info (data, tgt)->preserve_biv)
5072         return;
5073
5074       bsi = bsi_after_labels (bb_for_stmt (use->stmt));
5075       break;
5076
5077     case GIMPLE_MODIFY_STMT:
5078       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
5079       bsi = bsi_for_stmt (use->stmt);
5080       break;
5081
5082     default:
5083       gcc_unreachable ();
5084     }
5085
5086   op = force_gimple_operand_bsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5087                                  true, BSI_SAME_STMT);
5088
5089   if (TREE_CODE (use->stmt) == PHI_NODE)
5090     {
5091       ass = build_gimple_modify_stmt (tgt, op);
5092       bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
5093       remove_statement (use->stmt, false);
5094       SSA_NAME_DEF_STMT (tgt) = ass;
5095     }
5096   else
5097     GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
5098 }
5099
5100 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5101    for_each_index.  */
5102
5103 static bool
5104 idx_remove_ssa_names (tree base, tree *idx,
5105                       void *data ATTRIBUTE_UNUSED)
5106 {
5107   tree *op;
5108
5109   if (TREE_CODE (*idx) == SSA_NAME)
5110     *idx = SSA_NAME_VAR (*idx);
5111
5112   if (TREE_CODE (base) == ARRAY_REF)
5113     {
5114       op = &TREE_OPERAND (base, 2);
5115       if (*op
5116           && TREE_CODE (*op) == SSA_NAME)
5117         *op = SSA_NAME_VAR (*op);
5118       op = &TREE_OPERAND (base, 3);
5119       if (*op
5120           && TREE_CODE (*op) == SSA_NAME)
5121         *op = SSA_NAME_VAR (*op);
5122     }
5123
5124   return true;
5125 }
5126
5127 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5128
5129 static tree
5130 unshare_and_remove_ssa_names (tree ref)
5131 {
5132   ref = unshare_expr (ref);
5133   for_each_index (&ref, idx_remove_ssa_names, NULL);
5134
5135   return ref;
5136 }
5137
5138 /* Extract the alias analysis info for the memory reference REF.  There are
5139    several ways how this information may be stored and what precisely is
5140    its semantics depending on the type of the reference, but there always is
5141    somewhere hidden one _DECL node that is used to determine the set of
5142    virtual operands for the reference.  The code below deciphers this jungle
5143    and extracts this single useful piece of information.  */
5144
5145 static tree
5146 get_ref_tag (tree ref, tree orig)
5147 {
5148   tree var = get_base_address (ref);
5149   tree aref = NULL_TREE, tag, sv;
5150   HOST_WIDE_INT offset, size, maxsize;
5151
5152   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5153     {
5154       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5155       if (ref)
5156         break;
5157     }
5158
5159   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5160     return aref;
5161
5162   if (!var)
5163     return NULL_TREE;
5164
5165   if (TREE_CODE (var) == INDIRECT_REF)
5166     {
5167       /* If the base is a dereference of a pointer, first check its name memory
5168          tag.  If it does not have one, use its symbol memory tag.  */
5169       var = TREE_OPERAND (var, 0);
5170       if (TREE_CODE (var) != SSA_NAME)
5171         return NULL_TREE;
5172
5173       if (SSA_NAME_PTR_INFO (var))
5174         {
5175           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5176           if (tag)
5177             return tag;
5178         }
5179
5180       var = SSA_NAME_VAR (var);
5181       tag = symbol_mem_tag (var);
5182       gcc_assert (tag != NULL_TREE);
5183       return tag;
5184     }
5185   else
5186     {
5187       if (!DECL_P (var))
5188         return NULL_TREE;
5189
5190       tag = symbol_mem_tag (var);
5191       if (tag)
5192         return tag;
5193
5194       return var;
5195     }
5196 }
5197
5198 /* Copies the reference information from OLD_REF to NEW_REF.  */
5199
5200 static void
5201 copy_ref_info (tree new_ref, tree old_ref)
5202 {
5203   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5204     copy_mem_ref_info (new_ref, old_ref);
5205   else
5206     {
5207       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5208       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5209     }
5210 }
5211
5212 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5213
5214 static void
5215 rewrite_use_address (struct ivopts_data *data,
5216                      struct iv_use *use, struct iv_cand *cand)
5217 {
5218   aff_tree aff;
5219   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5220   tree ref;
5221   bool ok;
5222
5223   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5224   gcc_assert (ok);
5225   unshare_aff_combination (&aff);
5226
5227   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5228   copy_ref_info (ref, *use->op_p);
5229   *use->op_p = ref;
5230 }
5231
5232 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5233    candidate CAND.  */
5234
5235 static void
5236 rewrite_use_compare (struct ivopts_data *data,
5237                      struct iv_use *use, struct iv_cand *cand)
5238 {
5239   tree comp, *var_p, op, bound;
5240   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5241   enum tree_code compare;
5242   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5243   bool ok;
5244
5245   bound = cp->value;
5246   if (bound)
5247     {
5248       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5249       tree var_type = TREE_TYPE (var);
5250
5251       compare = iv_elimination_compare (data, use);
5252       bound = unshare_expr (fold_convert (var_type, bound));
5253       op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE,
5254                                      true, BSI_SAME_STMT);
5255
5256       *use->op_p = build2 (compare, boolean_type_node, var, op);
5257       return;
5258     }
5259
5260   /* The induction variable elimination failed; just express the original
5261      giv.  */
5262   comp = get_computation (data->current_loop, use, cand);
5263   gcc_assert (comp != NULL_TREE);
5264
5265   ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5266   gcc_assert (ok);
5267
5268   *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5269                                      true, BSI_SAME_STMT);
5270 }
5271
5272 /* Rewrites USE using candidate CAND.  */
5273
5274 static void
5275 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5276 {
5277   push_stmt_changes (&use->stmt);
5278
5279   switch (use->type)
5280     {
5281       case USE_NONLINEAR_EXPR:
5282         rewrite_use_nonlinear_expr (data, use, cand);
5283         break;
5284
5285       case USE_ADDRESS:
5286         rewrite_use_address (data, use, cand);
5287         break;
5288
5289       case USE_COMPARE:
5290         rewrite_use_compare (data, use, cand);
5291         break;
5292
5293       default:
5294         gcc_unreachable ();
5295     }
5296
5297   pop_stmt_changes (&use->stmt);
5298 }
5299
5300 /* Rewrite the uses using the selected induction variables.  */
5301
5302 static void
5303 rewrite_uses (struct ivopts_data *data)
5304 {
5305   unsigned i;
5306   struct iv_cand *cand;
5307   struct iv_use *use;
5308
5309   for (i = 0; i < n_iv_uses (data); i++)
5310     {
5311       use = iv_use (data, i);
5312       cand = use->selected;
5313       gcc_assert (cand);
5314
5315       rewrite_use (data, use, cand);
5316     }
5317 }
5318
5319 /* Removes the ivs that are not used after rewriting.  */
5320
5321 static void
5322 remove_unused_ivs (struct ivopts_data *data)
5323 {
5324   unsigned j;
5325   bitmap_iterator bi;
5326
5327   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5328     {
5329       struct version_info *info;
5330
5331       info = ver_info (data, j);
5332       if (info->iv
5333           && !integer_zerop (info->iv->step)
5334           && !info->inv_id
5335           && !info->iv->have_use_for
5336           && !info->preserve_biv)
5337         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5338     }
5339 }
5340
5341 /* Frees data allocated by the optimization of a single loop.  */
5342
5343 static void
5344 free_loop_data (struct ivopts_data *data)
5345 {
5346   unsigned i, j;
5347   bitmap_iterator bi;
5348   tree obj;
5349
5350   if (data->niters)
5351     {
5352       pointer_map_destroy (data->niters);
5353       data->niters = NULL;
5354     }
5355
5356   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5357     {
5358       struct version_info *info;
5359
5360       info = ver_info (data, i);
5361       if (info->iv)
5362         free (info->iv);
5363       info->iv = NULL;
5364       info->has_nonlin_use = false;
5365       info->preserve_biv = false;
5366       info->inv_id = 0;
5367     }
5368   bitmap_clear (data->relevant);
5369   bitmap_clear (data->important_candidates);
5370
5371   for (i = 0; i < n_iv_uses (data); i++)
5372     {
5373       struct iv_use *use = iv_use (data, i);
5374
5375       free (use->iv);
5376       BITMAP_FREE (use->related_cands);
5377       for (j = 0; j < use->n_map_members; j++)
5378         if (use->cost_map[j].depends_on)
5379           BITMAP_FREE (use->cost_map[j].depends_on);
5380       free (use->cost_map);
5381       free (use);
5382     }
5383   VEC_truncate (iv_use_p, data->iv_uses, 0);
5384
5385   for (i = 0; i < n_iv_cands (data); i++)
5386     {
5387       struct iv_cand *cand = iv_cand (data, i);
5388
5389       if (cand->iv)
5390         free (cand->iv);
5391       if (cand->depends_on)
5392         BITMAP_FREE (cand->depends_on);
5393       free (cand);
5394     }
5395   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5396
5397   if (data->version_info_size < num_ssa_names)
5398     {
5399       data->version_info_size = 2 * num_ssa_names;
5400       free (data->version_info);
5401       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5402     }
5403
5404   data->max_inv_id = 0;
5405
5406   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5407     SET_DECL_RTL (obj, NULL_RTX);
5408
5409   VEC_truncate (tree, decl_rtl_to_reset, 0);
5410 }
5411
5412 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5413    loop tree.  */
5414
5415 static void
5416 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5417 {
5418   free_loop_data (data);
5419   free (data->version_info);
5420   BITMAP_FREE (data->relevant);
5421   BITMAP_FREE (data->important_candidates);
5422
5423   VEC_free (tree, heap, decl_rtl_to_reset);
5424   VEC_free (iv_use_p, heap, data->iv_uses);
5425   VEC_free (iv_cand_p, heap, data->iv_candidates);
5426 }
5427
5428 /* Optimizes the LOOP.  Returns true if anything changed.  */
5429
5430 static bool
5431 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5432 {
5433   bool changed = false;
5434   struct iv_ca *iv_ca;
5435   edge exit;
5436
5437   gcc_assert (!data->niters);
5438   data->current_loop = loop;
5439
5440   if (dump_file && (dump_flags & TDF_DETAILS))
5441     {
5442       fprintf (dump_file, "Processing loop %d\n", loop->num);
5443
5444       exit = single_dom_exit (loop);
5445       if (exit)
5446         {
5447           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5448                    exit->src->index, exit->dest->index);
5449           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5450           fprintf (dump_file, "\n");
5451         }
5452
5453       fprintf (dump_file, "\n");
5454     }
5455
5456   /* For each ssa name determines whether it behaves as an induction variable
5457      in some loop.  */
5458   if (!find_induction_variables (data))
5459     goto finish;
5460
5461   /* Finds interesting uses (item 1).  */
5462   find_interesting_uses (data);
5463   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5464     goto finish;
5465
5466   /* Finds candidates for the induction variables (item 2).  */
5467   find_iv_candidates (data);
5468
5469   /* Calculates the costs (item 3, part 1).  */
5470   determine_use_iv_costs (data);
5471   determine_iv_costs (data);
5472   determine_set_costs (data);
5473
5474   /* Find the optimal set of induction variables (item 3, part 2).  */
5475   iv_ca = find_optimal_iv_set (data);
5476   if (!iv_ca)
5477     goto finish;
5478   changed = true;
5479
5480   /* Create the new induction variables (item 4, part 1).  */
5481   create_new_ivs (data, iv_ca);
5482   iv_ca_free (&iv_ca);
5483
5484   /* Rewrite the uses (item 4, part 2).  */
5485   rewrite_uses (data);
5486
5487   /* Remove the ivs that are unused after rewriting.  */
5488   remove_unused_ivs (data);
5489
5490   /* We have changed the structure of induction variables; it might happen
5491      that definitions in the scev database refer to some of them that were
5492      eliminated.  */
5493   scev_reset ();
5494
5495 finish:
5496   free_loop_data (data);
5497
5498   return changed;
5499 }
5500
5501 /* Main entry point.  Optimizes induction variables in loops.  */
5502
5503 void
5504 tree_ssa_iv_optimize (void)
5505 {
5506   struct loop *loop;
5507   struct ivopts_data data;
5508   loop_iterator li;
5509
5510   tree_ssa_iv_optimize_init (&data);
5511
5512   /* Optimize the loops starting with the innermost ones.  */
5513   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5514     {
5515       if (dump_file && (dump_flags & TDF_DETAILS))
5516         flow_loop_dump (loop, dump_file, NULL, 1);
5517
5518       tree_ssa_iv_optimize_loop (&data, loop);
5519     }
5520
5521   tree_ssa_iv_optimize_finalize (&data);
5522 }