gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
   3    Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "pointer-set.h"
  87 #include "hashtab.h"
  88 #include "tree-chrec.h"
  89 #include "tree-scalar-evolution.h"
  90 #include "cfgloop.h"
  91 #include "params.h"
  92 #include "langhooks.h"
  93 #include "tree-affine.h"
  94 #include "target.h"
  95
  96 /* The infinite cost.  */
  97 #define INFTY 10000000
  98
  99 /* The expected number of loop iterations.  TODO -- use profiling instead of
 100    this.  */
 101 #define AVG_LOOP_NITER(LOOP) 5
 102
 103
 104 /* Representation of the induction variable.  */
 105 struct iv
 106 {
 107   tree base;            /* Initial value of the iv.  */
 108   tree base_object;     /* A memory object to that the induction variable points.  */
 109   tree step;            /* Step of the iv (constant only).  */
 110   tree ssa_name;        /* The ssa name with the value.  */
 111   bool biv_p;           /* Is it a biv?  */
 112   bool have_use_for;    /* Do we already have a use for it?  */
 113   unsigned use_id;      /* The identifier in the use if it is the case.  */
 114 };
 115
 116 /* Per-ssa version information (induction variable descriptions, etc.).  */
 117 struct version_info
 118 {
 119   tree name;            /* The ssa name.  */
 120   struct iv *iv;        /* Induction variable description.  */
 121   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 122                            an expression that is not an induction variable.  */
 123   unsigned inv_id;      /* Id of an invariant.  */
 124   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 125 };
 126
 127 /* Types of uses.  */
 128 enum use_type
 129 {
 130   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 131   USE_ADDRESS,          /* Use in an address.  */
 132   USE_COMPARE           /* Use is a compare.  */
 133 };
 134
 135 /* Cost of a computation.  */
 136 typedef struct
 137 {
 138   unsigned cost;        /* The runtime cost.  */
 139   unsigned complexity;  /* The estimate of the complexity of the code for
 140                            the computation (in no concrete units --
 141                            complexity field should be larger for more
 142                            complex expressions and addressing modes).  */
 143 } comp_cost;
 144
 145 static const comp_cost zero_cost = {0, 0};
 146 static const comp_cost infinite_cost = {INFTY, INFTY};
 147
 148 /* The candidate - cost pair.  */
 149 struct cost_pair
 150 {
 151   struct iv_cand *cand; /* The candidate.  */
 152   comp_cost cost;       /* The cost.  */
 153   bitmap depends_on;    /* The list of invariants that have to be
 154                            preserved.  */
 155   tree value;           /* For final value elimination, the expression for
 156                            the final value of the iv.  For iv elimination,
 157                            the new bound to compare with.  */
 158 };
 159
 160 /* Use.  */
 161 struct iv_use
 162 {
 163   unsigned id;          /* The id of the use.  */
 164   enum use_type type;   /* Type of the use.  */
 165   struct iv *iv;        /* The induction variable it is based on.  */
 166   tree stmt;            /* Statement in that it occurs.  */
 167   tree *op_p;           /* The place where it occurs.  */
 168   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 169                            important ones.  */
 170
 171   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 172   struct cost_pair *cost_map;
 173                         /* The costs wrto the iv candidates.  */
 174
 175   struct iv_cand *selected;
 176                         /* The selected candidate.  */
 177 };
 178
 179 /* The position where the iv is computed.  */
 180 enum iv_position
 181 {
 182   IP_NORMAL,            /* At the end, just before the exit condition.  */
 183   IP_END,               /* At the end of the latch block.  */
 184   IP_ORIGINAL           /* The original biv.  */
 185 };
 186
 187 /* The induction variable candidate.  */
 188 struct iv_cand
 189 {
 190   unsigned id;          /* The number of the candidate.  */
 191   bool important;       /* Whether this is an "important" candidate, i.e. such
 192                            that it should be considered by all uses.  */
 193   enum iv_position pos; /* Where it is computed.  */
 194   tree incremented_at;  /* For original biv, the statement where it is
 195                            incremented.  */
 196   tree var_before;      /* The variable used for it before increment.  */
 197   tree var_after;       /* The variable used for it after increment.  */
 198   struct iv *iv;        /* The value of the candidate.  NULL for
 199                            "pseudocandidate" used to indicate the possibility
 200                            to replace the final value of an iv by direct
 201                            computation of the value.  */
 202   unsigned cost;        /* Cost of the candidate.  */
 203   bitmap depends_on;    /* The list of invariants that are used in step of the
 204                            biv.  */
 205 };
 206
 207 /* The data used by the induction variable optimizations.  */
 208
 209 typedef struct iv_use *iv_use_p;
 210 DEF_VEC_P(iv_use_p);
 211 DEF_VEC_ALLOC_P(iv_use_p,heap);
 212
 213 typedef struct iv_cand *iv_cand_p;
 214 DEF_VEC_P(iv_cand_p);
 215 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 216
 217 struct ivopts_data
 218 {
 219   /* The currently optimized loop.  */
 220   struct loop *current_loop;
 221
 222   /* Number of registers used in it.  */
 223   unsigned regs_used;
 224
 225   /* Numbers of iterations for all exits of the current loop.  */
 226   struct pointer_map_t *niters;
 227
 228   /* The size of version_info array allocated.  */
 229   unsigned version_info_size;
 230
 231   /* The array of information for the ssa names.  */
 232   struct version_info *version_info;
 233
 234   /* The bitmap of indices in version_info whose value was changed.  */
 235   bitmap relevant;
 236
 237   /* The maximum invariant id.  */
 238   unsigned max_inv_id;
 239
 240   /* The uses of induction variables.  */
 241   VEC(iv_use_p,heap) *iv_uses;
 242
 243   /* The candidates.  */
 244   VEC(iv_cand_p,heap) *iv_candidates;
 245
 246   /* A bitmap of important candidates.  */
 247   bitmap important_candidates;
 248
 249   /* Whether to consider just related and important candidates when replacing a
 250      use.  */
 251   bool consider_all_candidates;
 252 };
 253
 254 /* An assignment of iv candidates to uses.  */
 255
 256 struct iv_ca
 257 {
 258   /* The number of uses covered by the assignment.  */
 259   unsigned upto;
 260
 261   /* Number of uses that cannot be expressed by the candidates in the set.  */
 262   unsigned bad_uses;
 263
 264   /* Candidate assigned to a use, together with the related costs.  */
 265   struct cost_pair **cand_for_use;
 266
 267   /* Number of times each candidate is used.  */
 268   unsigned *n_cand_uses;
 269
 270   /* The candidates used.  */
 271   bitmap cands;
 272
 273   /* The number of candidates in the set.  */
 274   unsigned n_cands;
 275
 276   /* Total number of registers needed.  */
 277   unsigned n_regs;
 278
 279   /* Total cost of expressing uses.  */
 280   comp_cost cand_use_cost;
 281
 282   /* Total cost of candidates.  */
 283   unsigned cand_cost;
 284
 285   /* Number of times each invariant is used.  */
 286   unsigned *n_invariant_uses;
 287
 288   /* Total cost of the assignment.  */
 289   comp_cost cost;
 290 };
 291
 292 /* Difference of two iv candidate assignments.  */
 293
 294 struct iv_ca_delta
 295 {
 296   /* Changed use.  */
 297   struct iv_use *use;
 298
 299   /* An old assignment (for rollback purposes).  */
 300   struct cost_pair *old_cp;
 301
 302   /* A new assignment.  */
 303   struct cost_pair *new_cp;
 304
 305   /* Next change in the list.  */
 306   struct iv_ca_delta *next_change;
 307 };
 308
 309 /* Bound on number of candidates below that all candidates are considered.  */
 310
 311 #define CONSIDER_ALL_CANDIDATES_BOUND \
 312   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 313
 314 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 315    optimizing such a loop would help, and it would take ages).  */
 316
 317 #define MAX_CONSIDERED_USES \
 318   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 319
 320 /* If there are at most this number of ivs in the set, try removing unnecessary
 321    ivs from the set always.  */
 322
 323 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 324   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 325
 326 /* The list of trees for that the decl_rtl field must be reset is stored
 327    here.  */
 328
 329 static VEC(tree,heap) *decl_rtl_to_reset;
 330
 331 /* Number of uses recorded in DATA.  */
 332
 333 static inline unsigned
 334 n_iv_uses (struct ivopts_data *data)
 335 {
 336   return VEC_length (iv_use_p, data->iv_uses);
 337 }
 338
 339 /* Ith use recorded in DATA.  */
 340
 341 static inline struct iv_use *
 342 iv_use (struct ivopts_data *data, unsigned i)
 343 {
 344   return VEC_index (iv_use_p, data->iv_uses, i);
 345 }
 346
 347 /* Number of candidates recorded in DATA.  */
 348
 349 static inline unsigned
 350 n_iv_cands (struct ivopts_data *data)
 351 {
 352   return VEC_length (iv_cand_p, data->iv_candidates);
 353 }
 354
 355 /* Ith candidate recorded in DATA.  */
 356
 357 static inline struct iv_cand *
 358 iv_cand (struct ivopts_data *data, unsigned i)
 359 {
 360   return VEC_index (iv_cand_p, data->iv_candidates, i);
 361 }
 362
 363 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 364
 365 edge
 366 single_dom_exit (struct loop *loop)
 367 {
 368   edge exit = single_exit (loop);
 369
 370   if (!exit)
 371     return NULL;
 372
 373   if (!just_once_each_iteration_p (loop, exit->src))
 374     return NULL;
 375
 376   return exit;
 377 }
 378
 379 /* Dumps information about the induction variable IV to FILE.  */
 380
 381 extern void dump_iv (FILE *, struct iv *);
 382 void
 383 dump_iv (FILE *file, struct iv *iv)
 384 {
 385   if (iv->ssa_name)
 386     {
 387       fprintf (file, "ssa name ");
 388       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 389       fprintf (file, "\n");
 390     }
 391
 392   fprintf (file, "  type ");
 393   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 394   fprintf (file, "\n");
 395
 396   if (iv->step)
 397     {
 398       fprintf (file, "  base ");
 399       print_generic_expr (file, iv->base, TDF_SLIM);
 400       fprintf (file, "\n");
 401
 402       fprintf (file, "  step ");
 403       print_generic_expr (file, iv->step, TDF_SLIM);
 404       fprintf (file, "\n");
 405     }
 406   else
 407     {
 408       fprintf (file, "  invariant ");
 409       print_generic_expr (file, iv->base, TDF_SLIM);
 410       fprintf (file, "\n");
 411     }
 412
 413   if (iv->base_object)
 414     {
 415       fprintf (file, "  base object ");
 416       print_generic_expr (file, iv->base_object, TDF_SLIM);
 417       fprintf (file, "\n");
 418     }
 419
 420   if (iv->biv_p)
 421     fprintf (file, "  is a biv\n");
 422 }
 423
 424 /* Dumps information about the USE to FILE.  */
 425
 426 extern void dump_use (FILE *, struct iv_use *);
 427 void
 428 dump_use (FILE *file, struct iv_use *use)
 429 {
 430   fprintf (file, "use %d\n", use->id);
 431
 432   switch (use->type)
 433     {
 434     case USE_NONLINEAR_EXPR:
 435       fprintf (file, "  generic\n");
 436       break;
 437
 438     case USE_ADDRESS:
 439       fprintf (file, "  address\n");
 440       break;
 441
 442     case USE_COMPARE:
 443       fprintf (file, "  compare\n");
 444       break;
 445
 446     default:
 447       gcc_unreachable ();
 448     }
 449
 450   fprintf (file, "  in statement ");
 451   print_generic_expr (file, use->stmt, TDF_SLIM);
 452   fprintf (file, "\n");
 453
 454   fprintf (file, "  at position ");
 455   if (use->op_p)
 456     print_generic_expr (file, *use->op_p, TDF_SLIM);
 457   fprintf (file, "\n");
 458
 459   dump_iv (file, use->iv);
 460
 461   if (use->related_cands)
 462     {
 463       fprintf (file, "  related candidates ");
 464       dump_bitmap (file, use->related_cands);
 465     }
 466 }
 467
 468 /* Dumps information about the uses to FILE.  */
 469
 470 extern void dump_uses (FILE *, struct ivopts_data *);
 471 void
 472 dump_uses (FILE *file, struct ivopts_data *data)
 473 {
 474   unsigned i;
 475   struct iv_use *use;
 476
 477   for (i = 0; i < n_iv_uses (data); i++)
 478     {
 479       use = iv_use (data, i);
 480
 481       dump_use (file, use);
 482       fprintf (file, "\n");
 483     }
 484 }
 485
 486 /* Dumps information about induction variable candidate CAND to FILE.  */
 487
 488 extern void dump_cand (FILE *, struct iv_cand *);
 489 void
 490 dump_cand (FILE *file, struct iv_cand *cand)
 491 {
 492   struct iv *iv = cand->iv;
 493
 494   fprintf (file, "candidate %d%s\n",
 495            cand->id, cand->important ? " (important)" : "");
 496
 497   if (cand->depends_on)
 498     {
 499       fprintf (file, "  depends on ");
 500       dump_bitmap (file, cand->depends_on);
 501     }
 502
 503   if (!iv)
 504     {
 505       fprintf (file, "  final value replacement\n");
 506       return;
 507     }
 508
 509   switch (cand->pos)
 510     {
 511     case IP_NORMAL:
 512       fprintf (file, "  incremented before exit test\n");
 513       break;
 514
 515     case IP_END:
 516       fprintf (file, "  incremented at end\n");
 517       break;
 518
 519     case IP_ORIGINAL:
 520       fprintf (file, "  original biv\n");
 521       break;
 522     }
 523
 524   dump_iv (file, iv);
 525 }
 526
 527 /* Returns the info for ssa version VER.  */
 528
 529 static inline struct version_info *
 530 ver_info (struct ivopts_data *data, unsigned ver)
 531 {
 532   return data->version_info + ver;
 533 }
 534
 535 /* Returns the info for ssa name NAME.  */
 536
 537 static inline struct version_info *
 538 name_info (struct ivopts_data *data, tree name)
 539 {
 540   return ver_info (data, SSA_NAME_VERSION (name));
 541 }
 542
 543 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 544    emitted in LOOP.  */
 545
 546 static bool
 547 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 548 {
 549   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 550
 551   gcc_assert (bb);
 552
 553   if (sbb == loop->latch)
 554     return true;
 555
 556   if (sbb != bb)
 557     return false;
 558
 559   return stmt == last_stmt (bb);
 560 }
 561
 562 /* Returns true if STMT if after the place where the original induction
 563    variable CAND is incremented.  */
 564
 565 static bool
 566 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 567 {
 568   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 569   basic_block stmt_bb = bb_for_stmt (stmt);
 570   block_stmt_iterator bsi;
 571
 572   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 573     return false;
 574
 575   if (stmt_bb != cand_bb)
 576     return true;
 577
 578   /* Scan the block from the end, since the original ivs are usually
 579      incremented at the end of the loop body.  */
 580   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 581     {
 582       if (bsi_stmt (bsi) == cand->incremented_at)
 583         return false;
 584       if (bsi_stmt (bsi) == stmt)
 585         return true;
 586     }
 587 }
 588
 589 /* Returns true if STMT if after the place where the induction variable
 590    CAND is incremented in LOOP.  */
 591
 592 static bool
 593 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 594 {
 595   switch (cand->pos)
 596     {
 597     case IP_END:
 598       return false;
 599
 600     case IP_NORMAL:
 601       return stmt_after_ip_normal_pos (loop, stmt);
 602
 603     case IP_ORIGINAL:
 604       return stmt_after_ip_original_pos (cand, stmt);
 605
 606     default:
 607       gcc_unreachable ();
 608     }
 609 }
 610
 611 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 612
 613 static bool
 614 abnormal_ssa_name_p (tree exp)
 615 {
 616   if (!exp)
 617     return false;
 618
 619   if (TREE_CODE (exp) != SSA_NAME)
 620     return false;
 621
 622   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 623 }
 624
 625 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 626    abnormal phi node.  Callback for for_each_index.  */
 627
 628 static bool
 629 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 630                                   void *data ATTRIBUTE_UNUSED)
 631 {
 632   if (TREE_CODE (base) == ARRAY_REF)
 633     {
 634       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 635         return false;
 636       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 637         return false;
 638     }
 639
 640   return !abnormal_ssa_name_p (*index);
 641 }
 642
 643 /* Returns true if EXPR contains a ssa name that occurs in an
 644    abnormal phi node.  */
 645
 646 bool
 647 contains_abnormal_ssa_name_p (tree expr)
 648 {
 649   enum tree_code code;
 650   enum tree_code_class codeclass;
 651
 652   if (!expr)
 653     return false;
 654
 655   code = TREE_CODE (expr);
 656   codeclass = TREE_CODE_CLASS (code);
 657
 658   if (code == SSA_NAME)
 659     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 660
 661   if (code == INTEGER_CST
 662       || is_gimple_min_invariant (expr))
 663     return false;
 664
 665   if (code == ADDR_EXPR)
 666     return !for_each_index (&TREE_OPERAND (expr, 0),
 667                             idx_contains_abnormal_ssa_name_p,
 668                             NULL);
 669
 670   switch (codeclass)
 671     {
 672     case tcc_binary:
 673     case tcc_comparison:
 674       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 675         return true;
 676
 677       /* Fallthru.  */
 678     case tcc_unary:
 679       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 680         return true;
 681
 682       break;
 683
 684     default:
 685       gcc_unreachable ();
 686     }
 687
 688   return false;
 689 }
 690
 691 /*  Returns tree describing number of iterations determined from
 692     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 693
 694 static tree
 695 niter_for_exit (struct ivopts_data *data, edge exit)
 696 {
 697   struct tree_niter_desc desc;
 698   tree niter;
 699   void **slot;
 700
 701   if (!data->niters)
 702     {
 703       data->niters = pointer_map_create ();
 704       slot = NULL;
 705     }
 706   else
 707     slot = pointer_map_contains (data->niters, exit);
 708
 709   if (!slot)
 710     {
 711       /* Try to determine number of iterations.  We must know it
 712          unconditionally (i.e., without possibility of # of iterations
 713          being zero).  Also, we cannot safely work with ssa names that
 714          appear in phi nodes on abnormal edges, so that we do not create
 715          overlapping life ranges for them (PR 27283).  */
 716       if (number_of_iterations_exit (data->current_loop,
 717                                      exit, &desc, true)
 718           && integer_zerop (desc.may_be_zero)
 719           && !contains_abnormal_ssa_name_p (desc.niter))
 720         niter = desc.niter;
 721       else
 722         niter = NULL_TREE;
 723
 724       *pointer_map_insert (data->niters, exit) = niter;
 725     }
 726   else
 727     niter = (tree) *slot;
 728
 729   return niter;
 730 }
 731
 732 /* Returns tree describing number of iterations determined from
 733    single dominating exit of DATA->current_loop, or NULL if something
 734    goes wrong.  */
 735
 736 static tree
 737 niter_for_single_dom_exit (struct ivopts_data *data)
 738 {
 739   edge exit = single_dom_exit (data->current_loop);
 740
 741   if (!exit)
 742     return NULL;
 743
 744   return niter_for_exit (data, exit);
 745 }
 746
 747 /* Initializes data structures used by the iv optimization pass, stored
 748    in DATA.  */
 749
 750 static void
 751 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 752 {
 753   data->version_info_size = 2 * num_ssa_names;
 754   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 755   data->relevant = BITMAP_ALLOC (NULL);
 756   data->important_candidates = BITMAP_ALLOC (NULL);
 757   data->max_inv_id = 0;
 758   data->niters = NULL;
 759   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 760   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 761   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 762 }
 763
 764 /* Returns a memory object to that EXPR points.  In case we are able to
 765    determine that it does not point to any such object, NULL is returned.  */
 766
 767 static tree
 768 determine_base_object (tree expr)
 769 {
 770   enum tree_code code = TREE_CODE (expr);
 771   tree base, obj;
 772
 773   /* If this is a pointer casted to any type, we need to determine
 774      the base object for the pointer; so handle conversions before
 775      throwing away non-pointer expressions.  */
 776   if (CONVERT_EXPR_P (expr))
 777     return determine_base_object (TREE_OPERAND (expr, 0));
 778
 779   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 780     return NULL_TREE;
 781
 782   switch (code)
 783     {
 784     case INTEGER_CST:
 785       return NULL_TREE;
 786
 787     case ADDR_EXPR:
 788       obj = TREE_OPERAND (expr, 0);
 789       base = get_base_address (obj);
 790
 791       if (!base)
 792         return expr;
 793
 794       if (TREE_CODE (base) == INDIRECT_REF)
 795         return determine_base_object (TREE_OPERAND (base, 0));
 796
 797       return fold_convert (ptr_type_node,
 798                            build_fold_addr_expr (base));
 799
 800     case POINTER_PLUS_EXPR:
 801       return determine_base_object (TREE_OPERAND (expr, 0));
 802
 803     case PLUS_EXPR:
 804     case MINUS_EXPR:
 805       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 806       gcc_unreachable ();
 807
 808     default:
 809       return fold_convert (ptr_type_node, expr);
 810     }
 811 }
 812
 813 /* Allocates an induction variable with given initial value BASE and step STEP
 814    for loop LOOP.  */
 815
 816 static struct iv *
 817 alloc_iv (tree base, tree step)
 818 {
 819   struct iv *iv = XCNEW (struct iv);
 820   gcc_assert (step != NULL_TREE);
 821
 822   iv->base = base;
 823   iv->base_object = determine_base_object (base);
 824   iv->step = step;
 825   iv->biv_p = false;
 826   iv->have_use_for = false;
 827   iv->use_id = 0;
 828   iv->ssa_name = NULL_TREE;
 829
 830   return iv;
 831 }
 832
 833 /* Sets STEP and BASE for induction variable IV.  */
 834
 835 static void
 836 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 837 {
 838   struct version_info *info = name_info (data, iv);
 839
 840   gcc_assert (!info->iv);
 841
 842   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 843   info->iv = alloc_iv (base, step);
 844   info->iv->ssa_name = iv;
 845 }
 846
 847 /* Finds induction variable declaration for VAR.  */
 848
 849 static struct iv *
 850 get_iv (struct ivopts_data *data, tree var)
 851 {
 852   basic_block bb;
 853   tree type = TREE_TYPE (var);
 854
 855   if (!POINTER_TYPE_P (type)
 856       && !INTEGRAL_TYPE_P (type))
 857     return NULL;
 858
 859   if (!name_info (data, var)->iv)
 860     {
 861       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 862
 863       if (!bb
 864           || !flow_bb_inside_loop_p (data->current_loop, bb))
 865         set_iv (data, var, var, build_int_cst (type, 0));
 866     }
 867
 868   return name_info (data, var)->iv;
 869 }
 870
 871 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 872    not define a simple affine biv with nonzero step.  */
 873
 874 static tree
 875 determine_biv_step (tree phi)
 876 {
 877   struct loop *loop = bb_for_stmt (phi)->loop_father;
 878   tree name = PHI_RESULT (phi);
 879   affine_iv iv;
 880
 881   if (!is_gimple_reg (name))
 882     return NULL_TREE;
 883
 884   if (!simple_iv (loop, phi, name, &iv, true))
 885     return NULL_TREE;
 886
 887   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 888 }
 889
 890 /* Finds basic ivs.  */
 891
 892 static bool
 893 find_bivs (struct ivopts_data *data)
 894 {
 895   tree phi, step, type, base;
 896   bool found = false;
 897   struct loop *loop = data->current_loop;
 898
 899   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 900     {
 901       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 902         continue;
 903
 904       step = determine_biv_step (phi);
 905       if (!step)
 906         continue;
 907
 908       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 909       base = expand_simple_operations (base);
 910       if (contains_abnormal_ssa_name_p (base)
 911           || contains_abnormal_ssa_name_p (step))
 912         continue;
 913
 914       type = TREE_TYPE (PHI_RESULT (phi));
 915       base = fold_convert (type, base);
 916       if (step)
 917         {
 918           if (POINTER_TYPE_P (type))
 919             step = fold_convert (sizetype, step);
 920           else
 921             step = fold_convert (type, step);
 922         }
 923
 924       set_iv (data, PHI_RESULT (phi), base, step);
 925       found = true;
 926     }
 927
 928   return found;
 929 }
 930
 931 /* Marks basic ivs.  */
 932
 933 static void
 934 mark_bivs (struct ivopts_data *data)
 935 {
 936   tree phi, var;
 937   struct iv *iv, *incr_iv;
 938   struct loop *loop = data->current_loop;
 939   basic_block incr_bb;
 940
 941   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 942     {
 943       iv = get_iv (data, PHI_RESULT (phi));
 944       if (!iv)
 945         continue;
 946
 947       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 948       incr_iv = get_iv (data, var);
 949       if (!incr_iv)
 950         continue;
 951
 952       /* If the increment is in the subloop, ignore it.  */
 953       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 954       if (incr_bb->loop_father != data->current_loop
 955           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 956         continue;
 957
 958       iv->biv_p = true;
 959       incr_iv->biv_p = true;
 960     }
 961 }
 962
 963 /* Checks whether STMT defines a linear induction variable and stores its
 964    parameters to IV.  */
 965
 966 static bool
 967 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 968 {
 969   tree lhs;
 970   struct loop *loop = data->current_loop;
 971
 972   iv->base = NULL_TREE;
 973   iv->step = NULL_TREE;
 974
 975   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 976     return false;
 977
 978   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 979   if (TREE_CODE (lhs) != SSA_NAME)
 980     return false;
 981
 982   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 983     return false;
 984   iv->base = expand_simple_operations (iv->base);
 985
 986   if (contains_abnormal_ssa_name_p (iv->base)
 987       || contains_abnormal_ssa_name_p (iv->step))
 988     return false;
 989
 990   return true;
 991 }
 992
 993 /* Finds general ivs in statement STMT.  */
 994
 995 static void
 996 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
 997 {
 998   affine_iv iv;
 999
1000   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1001     return;
1002
1003   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1004 }
1005
1006 /* Finds general ivs in basic block BB.  */
1007
1008 static void
1009 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1010 {
1011   block_stmt_iterator bsi;
1012
1013   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1014     find_givs_in_stmt (data, bsi_stmt (bsi));
1015 }
1016
1017 /* Finds general ivs.  */
1018
1019 static void
1020 find_givs (struct ivopts_data *data)
1021 {
1022   struct loop *loop = data->current_loop;
1023   basic_block *body = get_loop_body_in_dom_order (loop);
1024   unsigned i;
1025
1026   for (i = 0; i < loop->num_nodes; i++)
1027     find_givs_in_bb (data, body[i]);
1028   free (body);
1029 }
1030
1031 /* For each ssa name defined in LOOP determines whether it is an induction
1032    variable and if so, its initial value and step.  */
1033
1034 static bool
1035 find_induction_variables (struct ivopts_data *data)
1036 {
1037   unsigned i;
1038   bitmap_iterator bi;
1039
1040   if (!find_bivs (data))
1041     return false;
1042
1043   find_givs (data);
1044   mark_bivs (data);
1045
1046   if (dump_file && (dump_flags & TDF_DETAILS))
1047     {
1048       tree niter = niter_for_single_dom_exit (data);
1049
1050       if (niter)
1051         {
1052           fprintf (dump_file, "  number of iterations ");
1053           print_generic_expr (dump_file, niter, TDF_SLIM);
1054           fprintf (dump_file, "\n\n");
1055         };
1056
1057       fprintf (dump_file, "Induction variables:\n\n");
1058
1059       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1060         {
1061           if (ver_info (data, i)->iv)
1062             dump_iv (dump_file, ver_info (data, i)->iv);
1063         }
1064     }
1065
1066   return true;
1067 }
1068
1069 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1070
1071 static struct iv_use *
1072 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1073             tree stmt, enum use_type use_type)
1074 {
1075   struct iv_use *use = XCNEW (struct iv_use);
1076
1077   use->id = n_iv_uses (data);
1078   use->type = use_type;
1079   use->iv = iv;
1080   use->stmt = stmt;
1081   use->op_p = use_p;
1082   use->related_cands = BITMAP_ALLOC (NULL);
1083
1084   /* To avoid showing ssa name in the dumps, if it was not reset by the
1085      caller.  */
1086   iv->ssa_name = NULL_TREE;
1087
1088   if (dump_file && (dump_flags & TDF_DETAILS))
1089     dump_use (dump_file, use);
1090
1091   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1092
1093   return use;
1094 }
1095
1096 /* Checks whether OP is a loop-level invariant and if so, records it.
1097    NONLINEAR_USE is true if the invariant is used in a way we do not
1098    handle specially.  */
1099
1100 static void
1101 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1102 {
1103   basic_block bb;
1104   struct version_info *info;
1105
1106   if (TREE_CODE (op) != SSA_NAME
1107       || !is_gimple_reg (op))
1108     return;
1109
1110   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1111   if (bb
1112       && flow_bb_inside_loop_p (data->current_loop, bb))
1113     return;
1114
1115   info = name_info (data, op);
1116   info->name = op;
1117   info->has_nonlin_use |= nonlinear_use;
1118   if (!info->inv_id)
1119     info->inv_id = ++data->max_inv_id;
1120   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1121 }
1122
1123 /* Checks whether the use OP is interesting and if so, records it.  */
1124
1125 static struct iv_use *
1126 find_interesting_uses_op (struct ivopts_data *data, tree op)
1127 {
1128   struct iv *iv;
1129   struct iv *civ;
1130   tree stmt;
1131   struct iv_use *use;
1132
1133   if (TREE_CODE (op) != SSA_NAME)
1134     return NULL;
1135
1136   iv = get_iv (data, op);
1137   if (!iv)
1138     return NULL;
1139
1140   if (iv->have_use_for)
1141     {
1142       use = iv_use (data, iv->use_id);
1143
1144       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1145       return use;
1146     }
1147
1148   if (integer_zerop (iv->step))
1149     {
1150       record_invariant (data, op, true);
1151       return NULL;
1152     }
1153   iv->have_use_for = true;
1154
1155   civ = XNEW (struct iv);
1156   *civ = *iv;
1157
1158   stmt = SSA_NAME_DEF_STMT (op);
1159   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1160               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1161
1162   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1163   iv->use_id = use->id;
1164
1165   return use;
1166 }
1167
1168 /* Given a condition *COND_P, checks whether it is a compare of an induction
1169    variable and an invariant.  If this is the case, CONTROL_VAR is set
1170    to location of the iv, BOUND to the location of the invariant,
1171    IV_VAR and IV_BOUND are set to the corresponding induction variable
1172    descriptions, and true is returned.  If this is not the case,
1173    CONTROL_VAR and BOUND are set to the arguments of the condition and
1174    false is returned.  */
1175
1176 static bool
1177 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1178                        tree **control_var, tree **bound,
1179                        struct iv **iv_var, struct iv **iv_bound)
1180 {
1181   /* The nodes returned when COND has just one operand.  Note that you should
1182      not modify anything in BOUND or IV_BOUND because of this.  */
1183   static struct iv const_iv;
1184   static tree zero;
1185   tree cond = *cond_p;
1186   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1187   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1188   bool ret = false;
1189
1190   zero = integer_zero_node;
1191   const_iv.step = integer_zero_node;
1192
1193   if (TREE_CODE (cond) == SSA_NAME)
1194     {
1195       op0 = cond_p;
1196       iv0 = get_iv (data, cond);
1197       ret = (iv0 && !integer_zerop (iv0->step));
1198       goto end;
1199     }
1200
1201   if (!COMPARISON_CLASS_P (cond))
1202     {
1203       op0 = cond_p;
1204       goto end;
1205     }
1206
1207   op0 = &TREE_OPERAND (cond, 0);
1208   op1 = &TREE_OPERAND (cond, 1);
1209   if (TREE_CODE (*op0) == SSA_NAME)
1210     iv0 = get_iv (data, *op0);
1211   if (TREE_CODE (*op1) == SSA_NAME)
1212     iv1 = get_iv (data, *op1);
1213
1214   /* Exactly one of the compared values must be an iv, and the other one must
1215      be an invariant.  */
1216   if (!iv0 || !iv1)
1217     goto end;
1218
1219   if (integer_zerop (iv0->step))
1220     {
1221       /* Control variable may be on the other side.  */
1222       tmp_op = op0; op0 = op1; op1 = tmp_op;
1223       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1224     }
1225   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1226
1227 end:
1228   if (control_var)
1229     *control_var = op0;;
1230   if (iv_var)
1231     *iv_var = iv0;;
1232   if (bound)
1233     *bound = op1;
1234   if (iv_bound)
1235     *iv_bound = iv1;
1236
1237   return ret;
1238 }
1239
1240 /* Checks whether the condition *COND_P in STMT is interesting
1241    and if so, records it.  */
1242
1243 static void
1244 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1245 {
1246   tree *var_p, *bound_p;
1247   struct iv *var_iv, *civ;
1248
1249   if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1250     {
1251       find_interesting_uses_op (data, *var_p);
1252       find_interesting_uses_op (data, *bound_p);
1253       return;
1254     }
1255
1256   civ = XNEW (struct iv);
1257   *civ = *var_iv;
1258   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1259 }
1260
1261 /* Returns true if expression EXPR is obviously invariant in LOOP,
1262    i.e. if all its operands are defined outside of the LOOP.  LOOP
1263    should not be the function body.  */
1264
1265 bool
1266 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1267 {
1268   basic_block def_bb;
1269   unsigned i, len;
1270
1271   gcc_assert (loop_depth (loop) > 0);
1272
1273   if (is_gimple_min_invariant (expr))
1274     return true;
1275
1276   if (TREE_CODE (expr) == SSA_NAME)
1277     {
1278       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1279       if (def_bb
1280           && flow_bb_inside_loop_p (loop, def_bb))
1281         return false;
1282
1283       return true;
1284     }
1285
1286   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1287     return false;
1288
1289   len = TREE_OPERAND_LENGTH (expr);
1290   for (i = 0; i < len; i++)
1291     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1292       return false;
1293
1294   return true;
1295 }
1296
1297 /* Cumulates the steps of indices into DATA and replaces their values with the
1298    initial ones.  Returns false when the value of the index cannot be determined.
1299    Callback for for_each_index.  */
1300
1301 struct ifs_ivopts_data
1302 {
1303   struct ivopts_data *ivopts_data;
1304   tree stmt;
1305   tree step;
1306 };
1307
1308 static bool
1309 idx_find_step (tree base, tree *idx, void *data)
1310 {
1311   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1312   struct iv *iv;
1313   tree step, iv_base, iv_step, lbound, off;
1314   struct loop *loop = dta->ivopts_data->current_loop;
1315
1316   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1317       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1318     return false;
1319
1320   /* If base is a component ref, require that the offset of the reference
1321      be invariant.  */
1322   if (TREE_CODE (base) == COMPONENT_REF)
1323     {
1324       off = component_ref_field_offset (base);
1325       return expr_invariant_in_loop_p (loop, off);
1326     }
1327
1328   /* If base is array, first check whether we will be able to move the
1329      reference out of the loop (in order to take its address in strength
1330      reduction).  In order for this to work we need both lower bound
1331      and step to be loop invariants.  */
1332   if (TREE_CODE (base) == ARRAY_REF)
1333     {
1334       step = array_ref_element_size (base);
1335       lbound = array_ref_low_bound (base);
1336
1337       if (!expr_invariant_in_loop_p (loop, step)
1338           || !expr_invariant_in_loop_p (loop, lbound))
1339         return false;
1340     }
1341
1342   if (TREE_CODE (*idx) != SSA_NAME)
1343     return true;
1344
1345   iv = get_iv (dta->ivopts_data, *idx);
1346   if (!iv)
1347     return false;
1348
1349   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1350           *&x[0], which is not folded and does not trigger the
1351           ARRAY_REF path below.  */
1352   *idx = iv->base;
1353
1354   if (integer_zerop (iv->step))
1355     return true;
1356
1357   if (TREE_CODE (base) == ARRAY_REF)
1358     {
1359       step = array_ref_element_size (base);
1360
1361       /* We only handle addresses whose step is an integer constant.  */
1362       if (TREE_CODE (step) != INTEGER_CST)
1363         return false;
1364     }
1365   else
1366     /* The step for pointer arithmetics already is 1 byte.  */
1367     step = build_int_cst (sizetype, 1);
1368
1369   iv_base = iv->base;
1370   iv_step = iv->step;
1371   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1372                             sizetype, &iv_base, &iv_step, dta->stmt,
1373                             false))
1374     {
1375       /* The index might wrap.  */
1376       return false;
1377     }
1378
1379   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1380   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1381
1382   return true;
1383 }
1384
1385 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1386    object is passed to it in DATA.  */
1387
1388 static bool
1389 idx_record_use (tree base, tree *idx,
1390                 void *vdata)
1391 {
1392   struct ivopts_data *data = (struct ivopts_data *) vdata;
1393   find_interesting_uses_op (data, *idx);
1394   if (TREE_CODE (base) == ARRAY_REF)
1395     {
1396       find_interesting_uses_op (data, array_ref_element_size (base));
1397       find_interesting_uses_op (data, array_ref_low_bound (base));
1398     }
1399   return true;
1400 }
1401
1402 /* If we can prove that TOP = cst * BOT for some constant cst,
1403    store cst to MUL and return true.  Otherwise return false.
1404    The returned value is always sign-extended, regardless of the
1405    signedness of TOP and BOT.  */
1406
1407 static bool
1408 constant_multiple_of (tree top, tree bot, double_int *mul)
1409 {
1410   tree mby;
1411   enum tree_code code;
1412   double_int res, p0, p1;
1413   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1414
1415   STRIP_NOPS (top);
1416   STRIP_NOPS (bot);
1417
1418   if (operand_equal_p (top, bot, 0))
1419     {
1420       *mul = double_int_one;
1421       return true;
1422     }
1423
1424   code = TREE_CODE (top);
1425   switch (code)
1426     {
1427     case MULT_EXPR:
1428       mby = TREE_OPERAND (top, 1);
1429       if (TREE_CODE (mby) != INTEGER_CST)
1430         return false;
1431
1432       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1433         return false;
1434
1435       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1436                               precision);
1437       return true;
1438
1439     case PLUS_EXPR:
1440     case MINUS_EXPR:
1441       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1442           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1443         return false;
1444
1445       if (code == MINUS_EXPR)
1446         p1 = double_int_neg (p1);
1447       *mul = double_int_sext (double_int_add (p0, p1), precision);
1448       return true;
1449
1450     case INTEGER_CST:
1451       if (TREE_CODE (bot) != INTEGER_CST)
1452         return false;
1453
1454       p0 = double_int_sext (tree_to_double_int (top), precision);
1455       p1 = double_int_sext (tree_to_double_int (bot), precision);
1456       if (double_int_zero_p (p1))
1457         return false;
1458       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1459                               precision);
1460       return double_int_zero_p (res);
1461
1462     default:
1463       return false;
1464     }
1465 }
1466
1467 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1468
1469 static bool
1470 may_be_unaligned_p (tree ref, tree step)
1471 {
1472   tree base;
1473   tree base_type;
1474   HOST_WIDE_INT bitsize;
1475   HOST_WIDE_INT bitpos;
1476   tree toffset;
1477   enum machine_mode mode;
1478   int unsignedp, volatilep;
1479   unsigned base_align;
1480
1481   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1482      thus they are not misaligned.  */
1483   if (TREE_CODE (ref) == TARGET_MEM_REF)
1484     return false;
1485
1486   /* The test below is basically copy of what expr.c:normal_inner_ref
1487      does to check whether the object must be loaded by parts when
1488      STRICT_ALIGNMENT is true.  */
1489   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1490                               &unsignedp, &volatilep, true);
1491   base_type = TREE_TYPE (base);
1492   base_align = TYPE_ALIGN (base_type);
1493
1494   if (mode != BLKmode)
1495     {
1496       double_int mul;
1497       tree al = build_int_cst (TREE_TYPE (step),
1498                                GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1499
1500       if (base_align < GET_MODE_ALIGNMENT (mode)
1501           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1502           || bitpos % BITS_PER_UNIT != 0)
1503         return true;
1504
1505       if (!constant_multiple_of (step, al, &mul))
1506         return true;
1507     }
1508
1509   return false;
1510 }
1511
1512 /* Return true if EXPR may be non-addressable.   */
1513
1514 static bool
1515 may_be_nonaddressable_p (tree expr)
1516 {
1517   switch (TREE_CODE (expr))
1518     {
1519     case TARGET_MEM_REF:
1520       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1521          target, thus they are always addressable.  */
1522       return false;
1523
1524     case COMPONENT_REF:
1525       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1526              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1527
1528     case VIEW_CONVERT_EXPR:
1529       /* This kind of view-conversions may wrap non-addressable objects
1530          and make them look addressable.  After some processing the
1531          non-addressability may be uncovered again, causing ADDR_EXPRs
1532          of inappropriate objects to be built.  */
1533       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1534           || is_gimple_min_invariant (TREE_OPERAND (expr, 0)))
1535         return true;
1536
1537       /* ... fall through ... */
1538
1539     case ARRAY_REF:
1540     case ARRAY_RANGE_REF:
1541       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1542
1543     CASE_CONVERT:
1544       return true;
1545
1546     default:
1547       break;
1548     }
1549
1550   return false;
1551 }
1552
1553 /* Finds addresses in *OP_P inside STMT.  */
1554
1555 static void
1556 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1557 {
1558   tree base = *op_p, step = build_int_cst (sizetype, 0);
1559   struct iv *civ;
1560   struct ifs_ivopts_data ifs_ivopts_data;
1561
1562   /* Do not play with volatile memory references.  A bit too conservative,
1563      perhaps, but safe.  */
1564   if (stmt_ann (stmt)->has_volatile_ops)
1565     goto fail;
1566
1567   /* Ignore bitfields for now.  Not really something terribly complicated
1568      to handle.  TODO.  */
1569   if (TREE_CODE (base) == BIT_FIELD_REF)
1570     goto fail;
1571
1572   base = unshare_expr (base);
1573
1574   if (TREE_CODE (base) == TARGET_MEM_REF)
1575     {
1576       tree type = build_pointer_type (TREE_TYPE (base));
1577       tree astep;
1578
1579       if (TMR_BASE (base)
1580           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1581         {
1582           civ = get_iv (data, TMR_BASE (base));
1583           if (!civ)
1584             goto fail;
1585
1586           TMR_BASE (base) = civ->base;
1587           step = civ->step;
1588         }
1589       if (TMR_INDEX (base)
1590           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1591         {
1592           civ = get_iv (data, TMR_INDEX (base));
1593           if (!civ)
1594             goto fail;
1595
1596           TMR_INDEX (base) = civ->base;
1597           astep = civ->step;
1598
1599           if (astep)
1600             {
1601               if (TMR_STEP (base))
1602                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1603
1604               step = fold_build2 (PLUS_EXPR, type, step, astep);
1605             }
1606         }
1607
1608       if (integer_zerop (step))
1609         goto fail;
1610       base = tree_mem_ref_addr (type, base);
1611     }
1612   else
1613     {
1614       ifs_ivopts_data.ivopts_data = data;
1615       ifs_ivopts_data.stmt = stmt;
1616       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1617       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1618           || integer_zerop (ifs_ivopts_data.step))
1619         goto fail;
1620       step = ifs_ivopts_data.step;
1621
1622       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1623       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1624
1625       /* Check that the base expression is addressable.  This needs
1626          to be done after substituting bases of IVs into it.  */
1627       if (may_be_nonaddressable_p (base))
1628         goto fail;
1629
1630       /* Moreover, on strict alignment platforms, check that it is
1631          sufficiently aligned.  */
1632       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1633         goto fail;
1634
1635       base = build_fold_addr_expr (base);
1636
1637       /* Substituting bases of IVs into the base expression might
1638          have caused folding opportunities.  */
1639       if (TREE_CODE (base) == ADDR_EXPR)
1640         {
1641           tree *ref = &TREE_OPERAND (base, 0);
1642           while (handled_component_p (*ref))
1643             ref = &TREE_OPERAND (*ref, 0);
1644           if (TREE_CODE (*ref) == INDIRECT_REF)
1645             *ref = fold_indirect_ref (*ref);
1646         }
1647     }
1648
1649   civ = alloc_iv (base, step);
1650   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1651   return;
1652
1653 fail:
1654   for_each_index (op_p, idx_record_use, data);
1655 }
1656
1657 /* Finds and records invariants used in STMT.  */
1658
1659 static void
1660 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1661 {
1662   ssa_op_iter iter;
1663   use_operand_p use_p;
1664   tree op;
1665
1666   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1667     {
1668       op = USE_FROM_PTR (use_p);
1669       record_invariant (data, op, false);
1670     }
1671 }
1672
1673 /* Finds interesting uses of induction variables in the statement STMT.  */
1674
1675 static void
1676 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1677 {
1678   struct iv *iv;
1679   tree op, lhs, rhs;
1680   ssa_op_iter iter;
1681   use_operand_p use_p;
1682
1683   find_invariants_stmt (data, stmt);
1684
1685   if (TREE_CODE (stmt) == COND_EXPR)
1686     {
1687       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1688       return;
1689     }
1690
1691   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1692     {
1693       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1694       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1695
1696       if (TREE_CODE (lhs) == SSA_NAME)
1697         {
1698           /* If the statement defines an induction variable, the uses are not
1699              interesting by themselves.  */
1700
1701           iv = get_iv (data, lhs);
1702
1703           if (iv && !integer_zerop (iv->step))
1704             return;
1705         }
1706
1707       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1708         {
1709         case tcc_comparison:
1710           find_interesting_uses_cond (data, stmt,
1711                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1712           return;
1713
1714         case tcc_reference:
1715           find_interesting_uses_address (data, stmt,
1716                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1717           if (REFERENCE_CLASS_P (lhs))
1718             find_interesting_uses_address (data, stmt,
1719                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1720           return;
1721
1722         default: ;
1723         }
1724
1725       if (REFERENCE_CLASS_P (lhs)
1726           && is_gimple_val (rhs))
1727         {
1728           find_interesting_uses_address (data, stmt,
1729                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1730           find_interesting_uses_op (data, rhs);
1731           return;
1732         }
1733
1734       /* TODO -- we should also handle address uses of type
1735
1736          memory = call (whatever);
1737
1738          and
1739
1740          call (memory).  */
1741     }
1742
1743   if (TREE_CODE (stmt) == PHI_NODE
1744       && bb_for_stmt (stmt) == data->current_loop->header)
1745     {
1746       lhs = PHI_RESULT (stmt);
1747       iv = get_iv (data, lhs);
1748
1749       if (iv && !integer_zerop (iv->step))
1750         return;
1751     }
1752
1753   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1754     {
1755       op = USE_FROM_PTR (use_p);
1756
1757       if (TREE_CODE (op) != SSA_NAME)
1758         continue;
1759
1760       iv = get_iv (data, op);
1761       if (!iv)
1762         continue;
1763
1764       find_interesting_uses_op (data, op);
1765     }
1766 }
1767
1768 /* Finds interesting uses of induction variables outside of loops
1769    on loop exit edge EXIT.  */
1770
1771 static void
1772 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1773 {
1774   tree phi, def;
1775
1776   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1777     {
1778       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1779       if (is_gimple_reg (def))
1780         find_interesting_uses_op (data, def);
1781     }
1782 }
1783
1784 /* Finds uses of the induction variables that are interesting.  */
1785
1786 static void
1787 find_interesting_uses (struct ivopts_data *data)
1788 {
1789   basic_block bb;
1790   block_stmt_iterator bsi;
1791   tree phi;
1792   basic_block *body = get_loop_body (data->current_loop);
1793   unsigned i;
1794   struct version_info *info;
1795   edge e;
1796
1797   if (dump_file && (dump_flags & TDF_DETAILS))
1798     fprintf (dump_file, "Uses:\n\n");
1799
1800   for (i = 0; i < data->current_loop->num_nodes; i++)
1801     {
1802       edge_iterator ei;
1803       bb = body[i];
1804
1805       FOR_EACH_EDGE (e, ei, bb->succs)
1806         if (e->dest != EXIT_BLOCK_PTR
1807             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1808           find_interesting_uses_outside (data, e);
1809
1810       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1811         find_interesting_uses_stmt (data, phi);
1812       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1813         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1814     }
1815
1816   if (dump_file && (dump_flags & TDF_DETAILS))
1817     {
1818       bitmap_iterator bi;
1819
1820       fprintf (dump_file, "\n");
1821
1822       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1823         {
1824           info = ver_info (data, i);
1825           if (info->inv_id)
1826             {
1827               fprintf (dump_file, "  ");
1828               print_generic_expr (dump_file, info->name, TDF_SLIM);
1829               fprintf (dump_file, " is invariant (%d)%s\n",
1830                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1831             }
1832         }
1833
1834       fprintf (dump_file, "\n");
1835     }
1836
1837   free (body);
1838 }
1839
1840 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1841    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1842    we are at the top-level of the processed address.  */
1843
1844 static tree
1845 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1846                 unsigned HOST_WIDE_INT *offset)
1847 {
1848   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1849   enum tree_code code;
1850   tree type, orig_type = TREE_TYPE (expr);
1851   unsigned HOST_WIDE_INT off0, off1, st;
1852   tree orig_expr = expr;
1853
1854   STRIP_NOPS (expr);
1855
1856   type = TREE_TYPE (expr);
1857   code = TREE_CODE (expr);
1858   *offset = 0;
1859
1860   switch (code)
1861     {
1862     case INTEGER_CST:
1863       if (!cst_and_fits_in_hwi (expr)
1864           || integer_zerop (expr))
1865         return orig_expr;
1866
1867       *offset = int_cst_value (expr);
1868       return build_int_cst (orig_type, 0);
1869
1870     case POINTER_PLUS_EXPR:
1871     case PLUS_EXPR:
1872     case MINUS_EXPR:
1873       op0 = TREE_OPERAND (expr, 0);
1874       op1 = TREE_OPERAND (expr, 1);
1875
1876       op0 = strip_offset_1 (op0, false, false, &off0);
1877       op1 = strip_offset_1 (op1, false, false, &off1);
1878
1879       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1880       if (op0 == TREE_OPERAND (expr, 0)
1881           && op1 == TREE_OPERAND (expr, 1))
1882         return orig_expr;
1883
1884       if (integer_zerop (op1))
1885         expr = op0;
1886       else if (integer_zerop (op0))
1887         {
1888           if (code == MINUS_EXPR)
1889             expr = fold_build1 (NEGATE_EXPR, type, op1);
1890           else
1891             expr = op1;
1892         }
1893       else
1894         expr = fold_build2 (code, type, op0, op1);
1895
1896       return fold_convert (orig_type, expr);
1897
1898     case ARRAY_REF:
1899       if (!inside_addr)
1900         return orig_expr;
1901
1902       step = array_ref_element_size (expr);
1903       if (!cst_and_fits_in_hwi (step))
1904         break;
1905
1906       st = int_cst_value (step);
1907       op1 = TREE_OPERAND (expr, 1);
1908       op1 = strip_offset_1 (op1, false, false, &off1);
1909       *offset = off1 * st;
1910
1911       if (top_compref
1912           && integer_zerop (op1))
1913         {
1914           /* Strip the component reference completely.  */
1915           op0 = TREE_OPERAND (expr, 0);
1916           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1917           *offset += off0;
1918           return op0;
1919         }
1920       break;
1921
1922     case COMPONENT_REF:
1923       if (!inside_addr)
1924         return orig_expr;
1925
1926       tmp = component_ref_field_offset (expr);
1927       if (top_compref
1928           && cst_and_fits_in_hwi (tmp))
1929         {
1930           /* Strip the component reference completely.  */
1931           op0 = TREE_OPERAND (expr, 0);
1932           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1933           *offset = off0 + int_cst_value (tmp);
1934           return op0;
1935         }
1936       break;
1937
1938     case ADDR_EXPR:
1939       op0 = TREE_OPERAND (expr, 0);
1940       op0 = strip_offset_1 (op0, true, true, &off0);
1941       *offset += off0;
1942
1943       if (op0 == TREE_OPERAND (expr, 0))
1944         return orig_expr;
1945
1946       expr = build_fold_addr_expr (op0);
1947       return fold_convert (orig_type, expr);
1948
1949     case INDIRECT_REF:
1950       inside_addr = false;
1951       break;
1952
1953     default:
1954       return orig_expr;
1955     }
1956
1957   /* Default handling of expressions for that we want to recurse into
1958      the first operand.  */
1959   op0 = TREE_OPERAND (expr, 0);
1960   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1961   *offset += off0;
1962
1963   if (op0 == TREE_OPERAND (expr, 0)
1964       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1965     return orig_expr;
1966
1967   expr = copy_node (expr);
1968   TREE_OPERAND (expr, 0) = op0;
1969   if (op1)
1970     TREE_OPERAND (expr, 1) = op1;
1971
1972   /* Inside address, we might strip the top level component references,
1973      thus changing type of the expression.  Handling of ADDR_EXPR
1974      will fix that.  */
1975   expr = fold_convert (orig_type, expr);
1976
1977   return expr;
1978 }
1979
1980 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1981
1982 static tree
1983 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1984 {
1985   return strip_offset_1 (expr, false, false, offset);
1986 }
1987
1988 /* Returns variant of TYPE that can be used as base for different uses.
1989    We return unsigned type with the same precision, which avoids problems
1990    with overflows.  */
1991
1992 static tree
1993 generic_type_for (tree type)
1994 {
1995   if (POINTER_TYPE_P (type))
1996     return unsigned_type_for (type);
1997
1998   if (TYPE_UNSIGNED (type))
1999     return type;
2000
2001   return unsigned_type_for (type);
2002 }
2003
2004 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2005    the bitmap to that we should store it.  */
2006
2007 static struct ivopts_data *fd_ivopts_data;
2008 static tree
2009 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2010 {
2011   bitmap *depends_on = (bitmap *) data;
2012   struct version_info *info;
2013
2014   if (TREE_CODE (*expr_p) != SSA_NAME)
2015     return NULL_TREE;
2016   info = name_info (fd_ivopts_data, *expr_p);
2017
2018   if (!info->inv_id || info->has_nonlin_use)
2019     return NULL_TREE;
2020
2021   if (!*depends_on)
2022     *depends_on = BITMAP_ALLOC (NULL);
2023   bitmap_set_bit (*depends_on, info->inv_id);
2024
2025   return NULL_TREE;
2026 }
2027
2028 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2029    position to POS.  If USE is not NULL, the candidate is set as related to
2030    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2031    replacement of the final value of the iv by a direct computation.  */
2032
2033 static struct iv_cand *
2034 add_candidate_1 (struct ivopts_data *data,
2035                  tree base, tree step, bool important, enum iv_position pos,
2036                  struct iv_use *use, tree incremented_at)
2037 {
2038   unsigned i;
2039   struct iv_cand *cand = NULL;
2040   tree type, orig_type;
2041
2042   if (base)
2043     {
2044       orig_type = TREE_TYPE (base);
2045       type = generic_type_for (orig_type);
2046       /* Don't convert the base to the generic type for pointers as the generic
2047          type is an integer type with the same size as the pointer type.  */
2048       if (type != orig_type && !POINTER_TYPE_P (orig_type))
2049         {
2050           base = fold_convert (type, base);
2051           step = fold_convert (type, step);
2052         }
2053     }
2054
2055   for (i = 0; i < n_iv_cands (data); i++)
2056     {
2057       cand = iv_cand (data, i);
2058
2059       if (cand->pos != pos)
2060         continue;
2061
2062       if (cand->incremented_at != incremented_at)
2063         continue;
2064
2065       if (!cand->iv)
2066         {
2067           if (!base && !step)
2068             break;
2069
2070           continue;
2071         }
2072
2073       if (!base && !step)
2074         continue;
2075
2076       if (operand_equal_p (base, cand->iv->base, 0)
2077           && operand_equal_p (step, cand->iv->step, 0))
2078         break;
2079     }
2080
2081   if (i == n_iv_cands (data))
2082     {
2083       cand = XCNEW (struct iv_cand);
2084       cand->id = i;
2085
2086       if (!base && !step)
2087         cand->iv = NULL;
2088       else
2089         cand->iv = alloc_iv (base, step);
2090
2091       cand->pos = pos;
2092       if (pos != IP_ORIGINAL && cand->iv)
2093         {
2094           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2095           cand->var_after = cand->var_before;
2096         }
2097       cand->important = important;
2098       cand->incremented_at = incremented_at;
2099       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2100
2101       if (step
2102           && TREE_CODE (step) != INTEGER_CST)
2103         {
2104           fd_ivopts_data = data;
2105           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2106         }
2107
2108       if (dump_file && (dump_flags & TDF_DETAILS))
2109         dump_cand (dump_file, cand);
2110     }
2111
2112   if (important && !cand->important)
2113     {
2114       cand->important = true;
2115       if (dump_file && (dump_flags & TDF_DETAILS))
2116         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2117     }
2118
2119   if (use)
2120     {
2121       bitmap_set_bit (use->related_cands, i);
2122       if (dump_file && (dump_flags & TDF_DETAILS))
2123         fprintf (dump_file, "Candidate %d is related to use %d\n",
2124                  cand->id, use->id);
2125     }
2126
2127   return cand;
2128 }
2129
2130 /* Returns true if incrementing the induction variable at the end of the LOOP
2131    is allowed.
2132
2133    The purpose is to avoid splitting latch edge with a biv increment, thus
2134    creating a jump, possibly confusing other optimization passes and leaving
2135    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2136    is not available (so we do not have a better alternative), or if the latch
2137    edge is already nonempty.  */
2138
2139 static bool
2140 allow_ip_end_pos_p (struct loop *loop)
2141 {
2142   if (!ip_normal_pos (loop))
2143     return true;
2144
2145   if (!empty_block_p (ip_end_pos (loop)))
2146     return true;
2147
2148   return false;
2149 }
2150
2151 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2152    position to POS.  If USE is not NULL, the candidate is set as related to
2153    it.  The candidate computation is scheduled on all available positions.  */
2154
2155 static void
2156 add_candidate (struct ivopts_data *data,
2157                tree base, tree step, bool important, struct iv_use *use)
2158 {
2159   if (ip_normal_pos (data->current_loop))
2160     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2161   if (ip_end_pos (data->current_loop)
2162       && allow_ip_end_pos_p (data->current_loop))
2163     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2164 }
2165
2166 /* Add a standard "0 + 1 * iteration" iv candidate for a
2167    type with SIZE bits.  */
2168
2169 static void
2170 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2171                                      unsigned int size)
2172 {
2173   tree type = lang_hooks.types.type_for_size (size, true);
2174   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2175                  true, NULL);
2176 }
2177
2178 /* Adds standard iv candidates.  */
2179
2180 static void
2181 add_standard_iv_candidates (struct ivopts_data *data)
2182 {
2183   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2184
2185   /* The same for a double-integer type if it is still fast enough.  */
2186   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2187     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2188 }
2189
2190
2191 /* Adds candidates bases on the old induction variable IV.  */
2192
2193 static void
2194 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2195 {
2196   tree phi, def;
2197   struct iv_cand *cand;
2198
2199   add_candidate (data, iv->base, iv->step, true, NULL);
2200
2201   /* The same, but with initial value zero.  */
2202   add_candidate (data,
2203                  build_int_cst (TREE_TYPE (iv->base), 0),
2204                  iv->step, true, NULL);
2205
2206   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2207   if (TREE_CODE (phi) == PHI_NODE)
2208     {
2209       /* Additionally record the possibility of leaving the original iv
2210          untouched.  */
2211       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2212       cand = add_candidate_1 (data,
2213                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2214                               SSA_NAME_DEF_STMT (def));
2215       cand->var_before = iv->ssa_name;
2216       cand->var_after = def;
2217     }
2218 }
2219
2220 /* Adds candidates based on the old induction variables.  */
2221
2222 static void
2223 add_old_ivs_candidates (struct ivopts_data *data)
2224 {
2225   unsigned i;
2226   struct iv *iv;
2227   bitmap_iterator bi;
2228
2229   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2230     {
2231       iv = ver_info (data, i)->iv;
2232       if (iv && iv->biv_p && !integer_zerop (iv->step))
2233         add_old_iv_candidates (data, iv);
2234     }
2235 }
2236
2237 /* Adds candidates based on the value of the induction variable IV and USE.  */
2238
2239 static void
2240 add_iv_value_candidates (struct ivopts_data *data,
2241                          struct iv *iv, struct iv_use *use)
2242 {
2243   unsigned HOST_WIDE_INT offset;
2244   tree base;
2245   tree basetype;
2246
2247   add_candidate (data, iv->base, iv->step, false, use);
2248
2249   /* The same, but with initial value zero.  Make such variable important,
2250      since it is generic enough so that possibly many uses may be based
2251      on it.  */
2252   basetype = TREE_TYPE (iv->base);
2253   if (POINTER_TYPE_P (basetype))
2254     basetype = sizetype;
2255   add_candidate (data, build_int_cst (basetype, 0),
2256                  iv->step, true, use);
2257
2258   /* Third, try removing the constant offset.  */
2259   base = strip_offset (iv->base, &offset);
2260   if (offset)
2261     add_candidate (data, base, iv->step, false, use);
2262 }
2263
2264 /* Adds candidates based on the uses.  */
2265
2266 static void
2267 add_derived_ivs_candidates (struct ivopts_data *data)
2268 {
2269   unsigned i;
2270
2271   for (i = 0; i < n_iv_uses (data); i++)
2272     {
2273       struct iv_use *use = iv_use (data, i);
2274
2275       if (!use)
2276         continue;
2277
2278       switch (use->type)
2279         {
2280         case USE_NONLINEAR_EXPR:
2281         case USE_COMPARE:
2282         case USE_ADDRESS:
2283           /* Just add the ivs based on the value of the iv used here.  */
2284           add_iv_value_candidates (data, use->iv, use);
2285           break;
2286
2287         default:
2288           gcc_unreachable ();
2289         }
2290     }
2291 }
2292
2293 /* Record important candidates and add them to related_cands bitmaps
2294    if needed.  */
2295
2296 static void
2297 record_important_candidates (struct ivopts_data *data)
2298 {
2299   unsigned i;
2300   struct iv_use *use;
2301
2302   for (i = 0; i < n_iv_cands (data); i++)
2303     {
2304       struct iv_cand *cand = iv_cand (data, i);
2305
2306       if (cand->important)
2307         bitmap_set_bit (data->important_candidates, i);
2308     }
2309
2310   data->consider_all_candidates = (n_iv_cands (data)
2311                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2312
2313   if (data->consider_all_candidates)
2314     {
2315       /* We will not need "related_cands" bitmaps in this case,
2316          so release them to decrease peak memory consumption.  */
2317       for (i = 0; i < n_iv_uses (data); i++)
2318         {
2319           use = iv_use (data, i);
2320           BITMAP_FREE (use->related_cands);
2321         }
2322     }
2323   else
2324     {
2325       /* Add important candidates to the related_cands bitmaps.  */
2326       for (i = 0; i < n_iv_uses (data); i++)
2327         bitmap_ior_into (iv_use (data, i)->related_cands,
2328                          data->important_candidates);
2329     }
2330 }
2331
2332 /* Finds the candidates for the induction variables.  */
2333
2334 static void
2335 find_iv_candidates (struct ivopts_data *data)
2336 {
2337   /* Add commonly used ivs.  */
2338   add_standard_iv_candidates (data);
2339
2340   /* Add old induction variables.  */
2341   add_old_ivs_candidates (data);
2342
2343   /* Add induction variables derived from uses.  */
2344   add_derived_ivs_candidates (data);
2345
2346   /* Record the important candidates.  */
2347   record_important_candidates (data);
2348 }
2349
2350 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2351    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2352    we allocate a simple list to every use.  */
2353
2354 static void
2355 alloc_use_cost_map (struct ivopts_data *data)
2356 {
2357   unsigned i, size, s, j;
2358
2359   for (i = 0; i < n_iv_uses (data); i++)
2360     {
2361       struct iv_use *use = iv_use (data, i);
2362       bitmap_iterator bi;
2363
2364       if (data->consider_all_candidates)
2365         size = n_iv_cands (data);
2366       else
2367         {
2368           s = 0;
2369           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2370             {
2371               s++;
2372             }
2373
2374           /* Round up to the power of two, so that moduling by it is fast.  */
2375           for (size = 1; size < s; size <<= 1)
2376             continue;
2377         }
2378
2379       use->n_map_members = size;
2380       use->cost_map = XCNEWVEC (struct cost_pair, size);
2381     }
2382 }
2383
2384 /* Returns description of computation cost of expression whose runtime
2385    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2386
2387 static comp_cost
2388 new_cost (unsigned runtime, unsigned complexity)
2389 {
2390   comp_cost cost;
2391
2392   cost.cost = runtime;
2393   cost.complexity = complexity;
2394
2395   return cost;
2396 }
2397
2398 /* Adds costs COST1 and COST2.  */
2399
2400 static comp_cost
2401 add_costs (comp_cost cost1, comp_cost cost2)
2402 {
2403   cost1.cost += cost2.cost;
2404   cost1.complexity += cost2.complexity;
2405
2406   return cost1;
2407 }
2408 /* Subtracts costs COST1 and COST2.  */
2409
2410 static comp_cost
2411 sub_costs (comp_cost cost1, comp_cost cost2)
2412 {
2413   cost1.cost -= cost2.cost;
2414   cost1.complexity -= cost2.complexity;
2415
2416   return cost1;
2417 }
2418
2419 /* Returns a negative number if COST1 < COST2, a positive number if
2420    COST1 > COST2, and 0 if COST1 = COST2.  */
2421
2422 static int
2423 compare_costs (comp_cost cost1, comp_cost cost2)
2424 {
2425   if (cost1.cost == cost2.cost)
2426     return cost1.complexity - cost2.complexity;
2427
2428   return cost1.cost - cost2.cost;
2429 }
2430
2431 /* Returns true if COST is infinite.  */
2432
2433 static bool
2434 infinite_cost_p (comp_cost cost)
2435 {
2436   return cost.cost == INFTY;
2437 }
2438
2439 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2440    on invariants DEPENDS_ON and that the value used in expressing it
2441    is VALUE.*/
2442
2443 static void
2444 set_use_iv_cost (struct ivopts_data *data,
2445                  struct iv_use *use, struct iv_cand *cand,
2446                  comp_cost cost, bitmap depends_on, tree value)
2447 {
2448   unsigned i, s;
2449
2450   if (infinite_cost_p (cost))
2451     {
2452       BITMAP_FREE (depends_on);
2453       return;
2454     }
2455
2456   if (data->consider_all_candidates)
2457     {
2458       use->cost_map[cand->id].cand = cand;
2459       use->cost_map[cand->id].cost = cost;
2460       use->cost_map[cand->id].depends_on = depends_on;
2461       use->cost_map[cand->id].value = value;
2462       return;
2463     }
2464
2465   /* n_map_members is a power of two, so this computes modulo.  */
2466   s = cand->id & (use->n_map_members - 1);
2467   for (i = s; i < use->n_map_members; i++)
2468     if (!use->cost_map[i].cand)
2469       goto found;
2470   for (i = 0; i < s; i++)
2471     if (!use->cost_map[i].cand)
2472       goto found;
2473
2474   gcc_unreachable ();
2475
2476 found:
2477   use->cost_map[i].cand = cand;
2478   use->cost_map[i].cost = cost;
2479   use->cost_map[i].depends_on = depends_on;
2480   use->cost_map[i].value = value;
2481 }
2482
2483 /* Gets cost of (USE, CANDIDATE) pair.  */
2484
2485 static struct cost_pair *
2486 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2487                  struct iv_cand *cand)
2488 {
2489   unsigned i, s;
2490   struct cost_pair *ret;
2491
2492   if (!cand)
2493     return NULL;
2494
2495   if (data->consider_all_candidates)
2496     {
2497       ret = use->cost_map + cand->id;
2498       if (!ret->cand)
2499         return NULL;
2500
2501       return ret;
2502     }
2503
2504   /* n_map_members is a power of two, so this computes modulo.  */
2505   s = cand->id & (use->n_map_members - 1);
2506   for (i = s; i < use->n_map_members; i++)
2507     if (use->cost_map[i].cand == cand)
2508       return use->cost_map + i;
2509
2510   for (i = 0; i < s; i++)
2511     if (use->cost_map[i].cand == cand)
2512       return use->cost_map + i;
2513
2514   return NULL;
2515 }
2516
2517 /* Returns estimate on cost of computing SEQ.  */
2518
2519 static unsigned
2520 seq_cost (rtx seq)
2521 {
2522   unsigned cost = 0;
2523   rtx set;
2524
2525   for (; seq; seq = NEXT_INSN (seq))
2526     {
2527       set = single_set (seq);
2528       if (set)
2529         cost += rtx_cost (set, SET);
2530       else
2531         cost++;
2532     }
2533
2534   return cost;
2535 }
2536
2537 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2538 static rtx
2539 produce_memory_decl_rtl (tree obj, int *regno)
2540 {
2541   rtx x;
2542
2543   gcc_assert (obj);
2544   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2545     {
2546       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2547       x = gen_rtx_SYMBOL_REF (Pmode, name);
2548       SET_SYMBOL_REF_DECL (x, obj);
2549       x = gen_rtx_MEM (DECL_MODE (obj), x);
2550       targetm.encode_section_info (obj, x, true);
2551     }
2552   else
2553     {
2554       x = gen_raw_REG (Pmode, (*regno)++);
2555       x = gen_rtx_MEM (DECL_MODE (obj), x);
2556     }
2557
2558   return x;
2559 }
2560
2561 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2562    walk_tree.  DATA contains the actual fake register number.  */
2563
2564 static tree
2565 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2566 {
2567   tree obj = NULL_TREE;
2568   rtx x = NULL_RTX;
2569   int *regno = (int *) data;
2570
2571   switch (TREE_CODE (*expr_p))
2572     {
2573     case ADDR_EXPR:
2574       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2575            handled_component_p (*expr_p);
2576            expr_p = &TREE_OPERAND (*expr_p, 0))
2577         continue;
2578       obj = *expr_p;
2579       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2580         x = produce_memory_decl_rtl (obj, regno);
2581       break;
2582
2583     case SSA_NAME:
2584       *ws = 0;
2585       obj = SSA_NAME_VAR (*expr_p);
2586       if (!DECL_RTL_SET_P (obj))
2587         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2588       break;
2589
2590     case VAR_DECL:
2591     case PARM_DECL:
2592     case RESULT_DECL:
2593       *ws = 0;
2594       obj = *expr_p;
2595
2596       if (DECL_RTL_SET_P (obj))
2597         break;
2598
2599       if (DECL_MODE (obj) == BLKmode)
2600         x = produce_memory_decl_rtl (obj, regno);
2601       else
2602         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2603
2604       break;
2605
2606     default:
2607       break;
2608     }
2609
2610   if (x)
2611     {
2612       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2613       SET_DECL_RTL (obj, x);
2614     }
2615
2616   return NULL_TREE;
2617 }
2618
2619 /* Determines cost of the computation of EXPR.  */
2620
2621 static unsigned
2622 computation_cost (tree expr)
2623 {
2624   rtx seq, rslt;
2625   tree type = TREE_TYPE (expr);
2626   unsigned cost;
2627   /* Avoid using hard regs in ways which may be unsupported.  */
2628   int regno = LAST_VIRTUAL_REGISTER + 1;
2629
2630   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2631   start_sequence ();
2632   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2633   seq = get_insns ();
2634   end_sequence ();
2635
2636   cost = seq_cost (seq);
2637   if (MEM_P (rslt))
2638     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2639
2640   return cost;
2641 }
2642
2643 /* Returns variable containing the value of candidate CAND at statement AT.  */
2644
2645 static tree
2646 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2647 {
2648   if (stmt_after_increment (loop, cand, stmt))
2649     return cand->var_after;
2650   else
2651     return cand->var_before;
2652 }
2653
2654 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2655    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2656
2657 int
2658 tree_int_cst_sign_bit (const_tree t)
2659 {
2660   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2661   unsigned HOST_WIDE_INT w;
2662
2663   if (bitno < HOST_BITS_PER_WIDE_INT)
2664     w = TREE_INT_CST_LOW (t);
2665   else
2666     {
2667       w = TREE_INT_CST_HIGH (t);
2668       bitno -= HOST_BITS_PER_WIDE_INT;
2669     }
2670
2671   return (w >> bitno) & 1;
2672 }
2673
2674 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2675    same precision that is at least as wide as the precision of TYPE, stores
2676    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2677    type of A and B.  */
2678
2679 static tree
2680 determine_common_wider_type (tree *a, tree *b)
2681 {
2682   tree wider_type = NULL;
2683   tree suba, subb;
2684   tree atype = TREE_TYPE (*a);
2685
2686   if (CONVERT_EXPR_P (*a))
2687     {
2688       suba = TREE_OPERAND (*a, 0);
2689       wider_type = TREE_TYPE (suba);
2690       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2691         return atype;
2692     }
2693   else
2694     return atype;
2695
2696   if (CONVERT_EXPR_P (*b))
2697     {
2698       subb = TREE_OPERAND (*b, 0);
2699       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2700         return atype;
2701     }
2702   else
2703     return atype;
2704
2705   *a = suba;
2706   *b = subb;
2707   return wider_type;
2708 }
2709
2710 /* Determines the expression by that USE is expressed from induction variable
2711    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2712    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2713
2714 static bool
2715 get_computation_aff (struct loop *loop,
2716                      struct iv_use *use, struct iv_cand *cand, tree at,
2717                      struct affine_tree_combination *aff)
2718 {
2719   tree ubase = use->iv->base;
2720   tree ustep = use->iv->step;
2721   tree cbase = cand->iv->base;
2722   tree cstep = cand->iv->step, cstep_common;
2723   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2724   tree common_type, var;
2725   tree uutype;
2726   aff_tree cbase_aff, var_aff;
2727   double_int rat;
2728
2729   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2730     {
2731       /* We do not have a precision to express the values of use.  */
2732       return false;
2733     }
2734
2735   var = var_at_stmt (loop, cand, at);
2736   uutype = unsigned_type_for (utype);
2737
2738   /* If the conversion is not noop, perform it.  */
2739   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2740     {
2741       cstep = fold_convert (uutype, cstep);
2742       cbase = fold_convert (uutype, cbase);
2743       var = fold_convert (uutype, var);
2744     }
2745
2746   if (!constant_multiple_of (ustep, cstep, &rat))
2747     return false;
2748
2749   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2750      type, we achieve better folding by computing their difference in this
2751      wider type, and cast the result to UUTYPE.  We do not need to worry about
2752      overflows, as all the arithmetics will in the end be performed in UUTYPE
2753      anyway.  */
2754   common_type = determine_common_wider_type (&ubase, &cbase);
2755
2756   /* use = ubase - ratio * cbase + ratio * var.  */
2757   tree_to_aff_combination (ubase, common_type, aff);
2758   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2759   tree_to_aff_combination (var, uutype, &var_aff);
2760
2761   /* We need to shift the value if we are after the increment.  */
2762   if (stmt_after_increment (loop, cand, at))
2763     {
2764       aff_tree cstep_aff;
2765
2766       if (common_type != uutype)
2767         cstep_common = fold_convert (common_type, cstep);
2768       else
2769         cstep_common = cstep;
2770
2771       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2772       aff_combination_add (&cbase_aff, &cstep_aff);
2773     }
2774
2775   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2776   aff_combination_add (aff, &cbase_aff);
2777   if (common_type != uutype)
2778     aff_combination_convert (aff, uutype);
2779
2780   aff_combination_scale (&var_aff, rat);
2781   aff_combination_add (aff, &var_aff);
2782
2783   return true;
2784 }
2785
2786 /* Determines the expression by that USE is expressed from induction variable
2787    CAND at statement AT in LOOP.  The computation is unshared.  */
2788
2789 static tree
2790 get_computation_at (struct loop *loop,
2791                     struct iv_use *use, struct iv_cand *cand, tree at)
2792 {
2793   aff_tree aff;
2794   tree type = TREE_TYPE (use->iv->base);
2795
2796   if (!get_computation_aff (loop, use, cand, at, &aff))
2797     return NULL_TREE;
2798   unshare_aff_combination (&aff);
2799   return fold_convert (type, aff_combination_to_tree (&aff));
2800 }
2801
2802 /* Determines the expression by that USE is expressed from induction variable
2803    CAND in LOOP.  The computation is unshared.  */
2804
2805 static tree
2806 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2807 {
2808   return get_computation_at (loop, use, cand, use->stmt);
2809 }
2810
2811 /* Returns cost of addition in MODE.  */
2812
2813 static unsigned
2814 add_cost (enum machine_mode mode)
2815 {
2816   static unsigned costs[NUM_MACHINE_MODES];
2817   rtx seq;
2818   unsigned cost;
2819
2820   if (costs[mode])
2821     return costs[mode];
2822
2823   start_sequence ();
2824   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2825                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2826                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2827                  NULL_RTX);
2828   seq = get_insns ();
2829   end_sequence ();
2830
2831   cost = seq_cost (seq);
2832   if (!cost)
2833     cost = 1;
2834
2835   costs[mode] = cost;
2836
2837   if (dump_file && (dump_flags & TDF_DETAILS))
2838     fprintf (dump_file, "Addition in %s costs %d\n",
2839              GET_MODE_NAME (mode), cost);
2840   return cost;
2841 }
2842
2843 /* Entry in a hashtable of already known costs for multiplication.  */
2844 struct mbc_entry
2845 {
2846   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2847   enum machine_mode mode;       /* In mode.  */
2848   unsigned cost;                /* The cost.  */
2849 };
2850
2851 /* Counts hash value for the ENTRY.  */
2852
2853 static hashval_t
2854 mbc_entry_hash (const void *entry)
2855 {
2856   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2857
2858   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2859 }
2860
2861 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2862
2863 static int
2864 mbc_entry_eq (const void *entry1, const void *entry2)
2865 {
2866   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2867   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2868
2869   return (e1->mode == e2->mode
2870           && e1->cst == e2->cst);
2871 }
2872
2873 /* Returns cost of multiplication by constant CST in MODE.  */
2874
2875 unsigned
2876 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2877 {
2878   static htab_t costs;
2879   struct mbc_entry **cached, act;
2880   rtx seq;
2881   unsigned cost;
2882
2883   if (!costs)
2884     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2885
2886   act.mode = mode;
2887   act.cst = cst;
2888   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2889   if (*cached)
2890     return (*cached)->cost;
2891
2892   *cached = XNEW (struct mbc_entry);
2893   (*cached)->mode = mode;
2894   (*cached)->cst = cst;
2895
2896   start_sequence ();
2897   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2898                gen_int_mode (cst, mode), NULL_RTX, 0);
2899   seq = get_insns ();
2900   end_sequence ();
2901
2902   cost = seq_cost (seq);
2903
2904   if (dump_file && (dump_flags & TDF_DETAILS))
2905     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2906              (int) cst, GET_MODE_NAME (mode), cost);
2907
2908   (*cached)->cost = cost;
2909
2910   return cost;
2911 }
2912
2913 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2914    validity for a memory reference accessing memory of mode MODE.  */
2915
2916 bool
2917 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2918 {
2919 #define MAX_RATIO 128
2920   static sbitmap valid_mult[MAX_MACHINE_MODE];
2921
2922   if (!valid_mult[mode])
2923     {
2924       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2925       rtx addr;
2926       HOST_WIDE_INT i;
2927
2928       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2929       sbitmap_zero (valid_mult[mode]);
2930       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2931       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2932         {
2933           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2934           if (memory_address_p (mode, addr))
2935             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2936         }
2937
2938       if (dump_file && (dump_flags & TDF_DETAILS))
2939         {
2940           fprintf (dump_file, "  allowed multipliers:");
2941           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2942             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2943               fprintf (dump_file, " %d", (int) i);
2944           fprintf (dump_file, "\n");
2945           fprintf (dump_file, "\n");
2946         }
2947     }
2948
2949   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2950     return false;
2951
2952   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2953 }
2954
2955 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2956    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2957    variable is omitted.  Compute the cost for a memory reference that accesses
2958    a memory location of mode MEM_MODE.
2959
2960    TODO -- there must be some better way.  This all is quite crude.  */
2961
2962 static comp_cost
2963 get_address_cost (bool symbol_present, bool var_present,
2964                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2965                   enum machine_mode mem_mode)
2966 {
2967   static bool initialized[MAX_MACHINE_MODE];
2968   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2969   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2970   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2971   unsigned cost, acost, complexity;
2972   bool offset_p, ratio_p;
2973   HOST_WIDE_INT s_offset;
2974   unsigned HOST_WIDE_INT mask;
2975   unsigned bits;
2976
2977   if (!initialized[mem_mode])
2978     {
2979       HOST_WIDE_INT i;
2980       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2981       int old_cse_not_expected;
2982       unsigned sym_p, var_p, off_p, rat_p, add_c;
2983       rtx seq, addr, base;
2984       rtx reg0, reg1;
2985
2986       initialized[mem_mode] = true;
2987
2988       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2989
2990       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2991       for (i = start; i <= 1 << 20; i <<= 1)
2992         {
2993           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2994           if (!memory_address_p (mem_mode, addr))
2995             break;
2996         }
2997       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2998       off[mem_mode] = max_offset[mem_mode];
2999
3000       for (i = start; i <= 1 << 20; i <<= 1)
3001         {
3002           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3003           if (!memory_address_p (mem_mode, addr))
3004             break;
3005         }
3006       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3007
3008       if (dump_file && (dump_flags & TDF_DETAILS))
3009         {
3010           fprintf (dump_file, "get_address_cost:\n");
3011           fprintf (dump_file, "  min offset %s %d\n",
3012                    GET_MODE_NAME (mem_mode),
3013                    (int) min_offset[mem_mode]);
3014           fprintf (dump_file, "  max offset %s %d\n",
3015                    GET_MODE_NAME (mem_mode),
3016                    (int) max_offset[mem_mode]);
3017         }
3018
3019       rat[mem_mode] = 1;
3020       for (i = 2; i <= MAX_RATIO; i++)
3021         if (multiplier_allowed_in_address_p (i, mem_mode))
3022           {
3023             rat[mem_mode] = i;
3024             break;
3025           }
3026
3027       /* Compute the cost of various addressing modes.  */
3028       acost = 0;
3029       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3030       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3031
3032       for (i = 0; i < 16; i++)
3033         {
3034           sym_p = i & 1;
3035           var_p = (i >> 1) & 1;
3036           off_p = (i >> 2) & 1;
3037           rat_p = (i >> 3) & 1;
3038
3039           addr = reg0;
3040           if (rat_p)
3041             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3042                                    gen_int_mode (rat[mem_mode], Pmode));
3043
3044           if (var_p)
3045             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3046
3047           if (sym_p)
3048             {
3049               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3050               /* ??? We can run into trouble with some backends by presenting
3051                  it with symbols which haven't been properly passed through
3052                  targetm.encode_section_info.  By setting the local bit, we
3053                  enhance the probability of things working.  */
3054               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3055
3056               if (off_p)
3057                 base = gen_rtx_fmt_e (CONST, Pmode,
3058                                       gen_rtx_fmt_ee (PLUS, Pmode,
3059                                                       base,
3060                                                       gen_int_mode (off[mem_mode],
3061                                                                     Pmode)));
3062             }
3063           else if (off_p)
3064             base = gen_int_mode (off[mem_mode], Pmode);
3065           else
3066             base = NULL_RTX;
3067
3068           if (base)
3069             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3070
3071           start_sequence ();
3072           /* To avoid splitting addressing modes, pretend that no cse will
3073              follow.  */
3074           old_cse_not_expected = cse_not_expected;
3075           cse_not_expected = true;
3076           addr = memory_address (mem_mode, addr);
3077           cse_not_expected = old_cse_not_expected;
3078           seq = get_insns ();
3079           end_sequence ();
3080
3081           acost = seq_cost (seq);
3082           acost += address_cost (addr, mem_mode);
3083
3084           if (!acost)
3085             acost = 1;
3086           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3087         }
3088
3089       /* On some targets, it is quite expensive to load symbol to a register,
3090          which makes addresses that contain symbols look much more expensive.
3091          However, the symbol will have to be loaded in any case before the
3092          loop (and quite likely we have it in register already), so it does not
3093          make much sense to penalize them too heavily.  So make some final
3094          tweaks for the SYMBOL_PRESENT modes:
3095
3096          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3097          var is cheaper, use this mode with small penalty.
3098          If VAR_PRESENT is true, try whether the mode with
3099          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3100          if this is the case, use it.  */
3101       add_c = add_cost (Pmode);
3102       for (i = 0; i < 8; i++)
3103         {
3104           var_p = i & 1;
3105           off_p = (i >> 1) & 1;
3106           rat_p = (i >> 2) & 1;
3107
3108           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3109           if (var_p)
3110             acost += add_c;
3111
3112           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3113             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3114         }
3115
3116       if (dump_file && (dump_flags & TDF_DETAILS))
3117         {
3118           fprintf (dump_file, "Address costs:\n");
3119
3120           for (i = 0; i < 16; i++)
3121             {
3122               sym_p = i & 1;
3123               var_p = (i >> 1) & 1;
3124               off_p = (i >> 2) & 1;
3125               rat_p = (i >> 3) & 1;
3126
3127               fprintf (dump_file, "  ");
3128               if (sym_p)
3129                 fprintf (dump_file, "sym + ");
3130               if (var_p)
3131                 fprintf (dump_file, "var + ");
3132               if (off_p)
3133                 fprintf (dump_file, "cst + ");
3134               if (rat_p)
3135                 fprintf (dump_file, "rat * ");
3136
3137               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3138               fprintf (dump_file, "index costs %d\n", acost);
3139             }
3140           fprintf (dump_file, "\n");
3141         }
3142     }
3143
3144   bits = GET_MODE_BITSIZE (Pmode);
3145   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3146   offset &= mask;
3147   if ((offset >> (bits - 1) & 1))
3148     offset |= ~mask;
3149   s_offset = offset;
3150
3151   cost = 0;
3152   offset_p = (s_offset != 0
3153               && min_offset[mem_mode] <= s_offset
3154               && s_offset <= max_offset[mem_mode]);
3155   ratio_p = (ratio != 1
3156              && multiplier_allowed_in_address_p (ratio, mem_mode));
3157
3158   if (ratio != 1 && !ratio_p)
3159     cost += multiply_by_cost (ratio, Pmode);
3160
3161   if (s_offset && !offset_p && !symbol_present)
3162     cost += add_cost (Pmode);
3163
3164   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3165   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3166   return new_cost (cost + acost, complexity);
3167 }
3168
3169 /* Estimates cost of forcing expression EXPR into a variable.  */
3170
3171 static comp_cost
3172 force_expr_to_var_cost (tree expr)
3173 {
3174   static bool costs_initialized = false;
3175   static unsigned integer_cost;
3176   static unsigned symbol_cost;
3177   static unsigned address_cost;
3178   tree op0, op1;
3179   comp_cost cost0, cost1, cost;
3180   enum machine_mode mode;
3181
3182   if (!costs_initialized)
3183     {
3184       tree type = build_pointer_type (integer_type_node);
3185       tree var, addr;
3186       rtx x;
3187
3188       var = create_tmp_var_raw (integer_type_node, "test_var");
3189       TREE_STATIC (var) = 1;
3190       x = produce_memory_decl_rtl (var, NULL);
3191       SET_DECL_RTL (var, x);
3192
3193       integer_cost = computation_cost (build_int_cst (integer_type_node,
3194                                                       2000));
3195
3196       addr = build1 (ADDR_EXPR, type, var);
3197       symbol_cost = computation_cost (addr) + 1;
3198
3199       address_cost
3200         = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3201                                     addr,
3202                                     build_int_cst (sizetype, 2000))) + 1;
3203       if (dump_file && (dump_flags & TDF_DETAILS))
3204         {
3205           fprintf (dump_file, "force_expr_to_var_cost:\n");
3206           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3207           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3208           fprintf (dump_file, "  address %d\n", (int) address_cost);
3209           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3210           fprintf (dump_file, "\n");
3211         }
3212
3213       costs_initialized = true;
3214     }
3215
3216   STRIP_NOPS (expr);
3217
3218   if (SSA_VAR_P (expr))
3219     return zero_cost;
3220
3221   if (is_gimple_min_invariant (expr))
3222     {
3223       if (TREE_CODE (expr) == INTEGER_CST)
3224         return new_cost (integer_cost, 0);
3225
3226       if (TREE_CODE (expr) == ADDR_EXPR)
3227         {
3228           tree obj = TREE_OPERAND (expr, 0);
3229
3230           if (TREE_CODE (obj) == VAR_DECL
3231               || TREE_CODE (obj) == PARM_DECL
3232               || TREE_CODE (obj) == RESULT_DECL)
3233             return new_cost (symbol_cost, 0);
3234         }
3235
3236       return new_cost (address_cost, 0);
3237     }
3238
3239   switch (TREE_CODE (expr))
3240     {
3241     case POINTER_PLUS_EXPR:
3242     case PLUS_EXPR:
3243     case MINUS_EXPR:
3244     case MULT_EXPR:
3245       op0 = TREE_OPERAND (expr, 0);
3246       op1 = TREE_OPERAND (expr, 1);
3247       STRIP_NOPS (op0);
3248       STRIP_NOPS (op1);
3249
3250       if (is_gimple_val (op0))
3251         cost0 = zero_cost;
3252       else
3253         cost0 = force_expr_to_var_cost (op0);
3254
3255       if (is_gimple_val (op1))
3256         cost1 = zero_cost;
3257       else
3258         cost1 = force_expr_to_var_cost (op1);
3259
3260       break;
3261
3262     default:
3263       /* Just an arbitrary value, FIXME.  */
3264       return new_cost (target_spill_cost, 0);
3265     }
3266
3267   mode = TYPE_MODE (TREE_TYPE (expr));
3268   switch (TREE_CODE (expr))
3269     {
3270     case POINTER_PLUS_EXPR:
3271     case PLUS_EXPR:
3272     case MINUS_EXPR:
3273       cost = new_cost (add_cost (mode), 0);
3274       break;
3275
3276     case MULT_EXPR:
3277       if (cst_and_fits_in_hwi (op0))
3278         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3279       else if (cst_and_fits_in_hwi (op1))
3280         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3281       else
3282         return new_cost (target_spill_cost, 0);
3283       break;
3284
3285     default:
3286       gcc_unreachable ();
3287     }
3288
3289   cost = add_costs (cost, cost0);
3290   cost = add_costs (cost, cost1);
3291
3292   /* Bound the cost by target_spill_cost.  The parts of complicated
3293      computations often are either loop invariant or at least can
3294      be shared between several iv uses, so letting this grow without
3295      limits would not give reasonable results.  */
3296   if (cost.cost > target_spill_cost)
3297     cost.cost = target_spill_cost;
3298
3299   return cost;
3300 }
3301
3302 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3303    invariants the computation depends on.  */
3304
3305 static comp_cost
3306 force_var_cost (struct ivopts_data *data,
3307                 tree expr, bitmap *depends_on)
3308 {
3309   if (depends_on)
3310     {
3311       fd_ivopts_data = data;
3312       walk_tree (&expr, find_depends, depends_on, NULL);
3313     }
3314
3315   return force_expr_to_var_cost (expr);
3316 }
3317
3318 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3319    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3320    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3321    invariants the computation depends on.  */
3322
3323 static comp_cost
3324 split_address_cost (struct ivopts_data *data,
3325                     tree addr, bool *symbol_present, bool *var_present,
3326                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3327 {
3328   tree core;
3329   HOST_WIDE_INT bitsize;
3330   HOST_WIDE_INT bitpos;
3331   tree toffset;
3332   enum machine_mode mode;
3333   int unsignedp, volatilep;
3334
3335   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3336                               &unsignedp, &volatilep, false);
3337
3338   if (toffset != 0
3339       || bitpos % BITS_PER_UNIT != 0
3340       || TREE_CODE (core) != VAR_DECL)
3341     {
3342       *symbol_present = false;
3343       *var_present = true;
3344       fd_ivopts_data = data;
3345       walk_tree (&addr, find_depends, depends_on, NULL);
3346       return new_cost (target_spill_cost, 0);
3347     }
3348
3349   *offset += bitpos / BITS_PER_UNIT;
3350   if (TREE_STATIC (core)
3351       || DECL_EXTERNAL (core))
3352     {
3353       *symbol_present = true;
3354       *var_present = false;
3355       return zero_cost;
3356     }
3357
3358   *symbol_present = false;
3359   *var_present = true;
3360   return zero_cost;
3361 }
3362
3363 /* Estimates cost of expressing difference of addresses E1 - E2 as
3364    var + symbol + offset.  The value of offset is added to OFFSET,
3365    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3366    part is missing.  DEPENDS_ON is a set of the invariants the computation
3367    depends on.  */
3368
3369 static comp_cost
3370 ptr_difference_cost (struct ivopts_data *data,
3371                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3372                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3373 {
3374   HOST_WIDE_INT diff = 0;
3375   comp_cost cost;
3376
3377   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3378
3379   if (ptr_difference_const (e1, e2, &diff))
3380     {
3381       *offset += diff;
3382       *symbol_present = false;
3383       *var_present = false;
3384       return zero_cost;
3385     }
3386
3387   if (integer_zerop (e2))
3388     return split_address_cost (data, TREE_OPERAND (e1, 0),
3389                                symbol_present, var_present, offset, depends_on);
3390
3391   *symbol_present = false;
3392   *var_present = true;
3393
3394   cost = force_var_cost (data, e1, depends_on);
3395   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3396   cost.cost += add_cost (Pmode);
3397
3398   return cost;
3399 }
3400
3401 /* Estimates cost of expressing difference E1 - E2 as
3402    var + symbol + offset.  The value of offset is added to OFFSET,
3403    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3404    part is missing.  DEPENDS_ON is a set of the invariants the computation
3405    depends on.  */
3406
3407 static comp_cost
3408 difference_cost (struct ivopts_data *data,
3409                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3410                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3411 {
3412   comp_cost cost;
3413   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3414   unsigned HOST_WIDE_INT off1, off2;
3415
3416   e1 = strip_offset (e1, &off1);
3417   e2 = strip_offset (e2, &off2);
3418   *offset += off1 - off2;
3419
3420   STRIP_NOPS (e1);
3421   STRIP_NOPS (e2);
3422
3423   if (TREE_CODE (e1) == ADDR_EXPR)
3424     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3425                                 depends_on);
3426   *symbol_present = false;
3427
3428   if (operand_equal_p (e1, e2, 0))
3429     {
3430       *var_present = false;
3431       return zero_cost;
3432     }
3433   *var_present = true;
3434   if (integer_zerop (e2))
3435     return force_var_cost (data, e1, depends_on);
3436
3437   if (integer_zerop (e1))
3438     {
3439       cost = force_var_cost (data, e2, depends_on);
3440       cost.cost += multiply_by_cost (-1, mode);
3441
3442       return cost;
3443     }
3444
3445   cost = force_var_cost (data, e1, depends_on);
3446   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3447   cost.cost += add_cost (mode);
3448
3449   return cost;
3450 }
3451
3452 /* Determines the cost of the computation by that USE is expressed
3453    from induction variable CAND.  If ADDRESS_P is true, we just need
3454    to create an address from it, otherwise we want to get it into
3455    register.  A set of invariants we depend on is stored in
3456    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3457
3458 static comp_cost
3459 get_computation_cost_at (struct ivopts_data *data,
3460                          struct iv_use *use, struct iv_cand *cand,
3461                          bool address_p, bitmap *depends_on, tree at)
3462 {
3463   tree ubase = use->iv->base, ustep = use->iv->step;
3464   tree cbase, cstep;
3465   tree utype = TREE_TYPE (ubase), ctype;
3466   unsigned HOST_WIDE_INT cstepi, offset = 0;
3467   HOST_WIDE_INT ratio, aratio;
3468   bool var_present, symbol_present;
3469   comp_cost cost;
3470   unsigned n_sums;
3471   double_int rat;
3472
3473   *depends_on = NULL;
3474
3475   /* Only consider real candidates.  */
3476   if (!cand->iv)
3477     return infinite_cost;
3478
3479   cbase = cand->iv->base;
3480   cstep = cand->iv->step;
3481   ctype = TREE_TYPE (cbase);
3482
3483   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3484     {
3485       /* We do not have a precision to express the values of use.  */
3486       return infinite_cost;
3487     }
3488
3489   if (address_p)
3490     {
3491       /* Do not try to express address of an object with computation based
3492          on address of a different object.  This may cause problems in rtl
3493          level alias analysis (that does not expect this to be happening,
3494          as this is illegal in C), and would be unlikely to be useful
3495          anyway.  */
3496       if (use->iv->base_object
3497           && cand->iv->base_object
3498           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3499         return infinite_cost;
3500     }
3501
3502   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3503     {
3504       /* TODO -- add direct handling of this case.  */
3505       goto fallback;
3506     }
3507
3508   /* CSTEPI is removed from the offset in case statement is after the
3509      increment.  If the step is not constant, we use zero instead.
3510      This is a bit imprecise (there is the extra addition), but
3511      redundancy elimination is likely to transform the code so that
3512      it uses value of the variable before increment anyway,
3513      so it is not that much unrealistic.  */
3514   if (cst_and_fits_in_hwi (cstep))
3515     cstepi = int_cst_value (cstep);
3516   else
3517     cstepi = 0;
3518
3519   if (!constant_multiple_of (ustep, cstep, &rat))
3520     return infinite_cost;
3521
3522   if (double_int_fits_in_shwi_p (rat))
3523     ratio = double_int_to_shwi (rat);
3524   else
3525     return infinite_cost;
3526
3527   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3528      or ratio == 1, it is better to handle this like
3529
3530      ubase - ratio * cbase + ratio * var
3531
3532      (also holds in the case ratio == -1, TODO.  */
3533
3534   if (cst_and_fits_in_hwi (cbase))
3535     {
3536       offset = - ratio * int_cst_value (cbase);
3537       cost = difference_cost (data,
3538                               ubase, build_int_cst (utype, 0),
3539                               &symbol_present, &var_present, &offset,
3540                               depends_on);
3541     }
3542   else if (ratio == 1)
3543     {
3544       cost = difference_cost (data,
3545                               ubase, cbase,
3546                               &symbol_present, &var_present, &offset,
3547                               depends_on);
3548     }
3549   else
3550     {
3551       cost = force_var_cost (data, cbase, depends_on);
3552       cost.cost += add_cost (TYPE_MODE (ctype));
3553       cost = add_costs (cost,
3554                         difference_cost (data,
3555                                          ubase, build_int_cst (utype, 0),
3556                                          &symbol_present, &var_present,
3557                                          &offset, depends_on));
3558     }
3559
3560   /* If we are after the increment, the value of the candidate is higher by
3561      one iteration.  */
3562   if (stmt_after_increment (data->current_loop, cand, at))
3563     offset -= ratio * cstepi;
3564
3565   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3566      (symbol/var/const parts may be omitted).  If we are looking for an address,
3567      find the cost of addressing this.  */
3568   if (address_p)
3569     return add_costs (cost, get_address_cost (symbol_present, var_present,
3570                                 offset, ratio,
3571                                 TYPE_MODE (TREE_TYPE (*use->op_p))));
3572
3573   /* Otherwise estimate the costs for computing the expression.  */
3574   aratio = ratio > 0 ? ratio : -ratio;
3575   if (!symbol_present && !var_present && !offset)
3576     {
3577       if (ratio != 1)
3578         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3579
3580       return cost;
3581     }
3582
3583   if (aratio != 1)
3584     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3585
3586   n_sums = 1;
3587   if (var_present
3588       /* Symbol + offset should be compile-time computable.  */
3589       && (symbol_present || offset))
3590     n_sums++;
3591
3592   /* Having offset does not affect runtime cost in case it is added to
3593      symbol, but it increases complexity.  */
3594   if (offset)
3595     cost.complexity++;
3596
3597   cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3598   return cost;
3599
3600 fallback:
3601   {
3602     /* Just get the expression, expand it and measure the cost.  */
3603     tree comp = get_computation_at (data->current_loop, use, cand, at);
3604
3605     if (!comp)
3606       return infinite_cost;
3607
3608     if (address_p)
3609       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3610
3611     return new_cost (computation_cost (comp), 0);
3612   }
3613 }
3614
3615 /* Determines the cost of the computation by that USE is expressed
3616    from induction variable CAND.  If ADDRESS_P is true, we just need
3617    to create an address from it, otherwise we want to get it into
3618    register.  A set of invariants we depend on is stored in
3619    DEPENDS_ON.  */
3620
3621 static comp_cost
3622 get_computation_cost (struct ivopts_data *data,
3623                       struct iv_use *use, struct iv_cand *cand,
3624                       bool address_p, bitmap *depends_on)
3625 {
3626   return get_computation_cost_at (data,
3627                                   use, cand, address_p, depends_on, use->stmt);
3628 }
3629
3630 /* Determines cost of basing replacement of USE on CAND in a generic
3631    expression.  */
3632
3633 static bool
3634 determine_use_iv_cost_generic (struct ivopts_data *data,
3635                                struct iv_use *use, struct iv_cand *cand)
3636 {
3637   bitmap depends_on;
3638   comp_cost cost;
3639
3640   /* The simple case first -- if we need to express value of the preserved
3641      original biv, the cost is 0.  This also prevents us from counting the
3642      cost of increment twice -- once at this use and once in the cost of
3643      the candidate.  */
3644   if (cand->pos == IP_ORIGINAL
3645       && cand->incremented_at == use->stmt)
3646     {
3647       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3648       return true;
3649     }
3650
3651   cost = get_computation_cost (data, use, cand, false, &depends_on);
3652   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3653
3654   return !infinite_cost_p (cost);
3655 }
3656
3657 /* Determines cost of basing replacement of USE on CAND in an address.  */
3658
3659 static bool
3660 determine_use_iv_cost_address (struct ivopts_data *data,
3661                                struct iv_use *use, struct iv_cand *cand)
3662 {
3663   bitmap depends_on;
3664   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3665
3666   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3667
3668   return !infinite_cost_p (cost);
3669 }
3670
3671 /* Computes value of candidate CAND at position AT in iteration NITER, and
3672    stores it to VAL.  */
3673
3674 static void
3675 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3676                aff_tree *val)
3677 {
3678   aff_tree step, delta, nit;
3679   struct iv *iv = cand->iv;
3680   tree type = TREE_TYPE (iv->base);
3681   tree steptype = type;
3682   if (POINTER_TYPE_P (type))
3683     steptype = sizetype;
3684
3685   tree_to_aff_combination (iv->step, steptype, &step);
3686   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3687   aff_combination_convert (&nit, steptype);
3688   aff_combination_mult (&nit, &step, &delta);
3689   if (stmt_after_increment (loop, cand, at))
3690     aff_combination_add (&delta, &step);
3691
3692   tree_to_aff_combination (iv->base, type, val);
3693   aff_combination_add (val, &delta);
3694 }
3695
3696 /* Returns period of induction variable iv.  */
3697
3698 static tree
3699 iv_period (struct iv *iv)
3700 {
3701   tree step = iv->step, period, type;
3702   tree pow2div;
3703
3704   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3705
3706   /* Period of the iv is gcd (step, type range).  Since type range is power
3707      of two, it suffices to determine the maximum power of two that divides
3708      step.  */
3709   pow2div = num_ending_zeros (step);
3710   type = unsigned_type_for (TREE_TYPE (step));
3711
3712   period = build_low_bits_mask (type,
3713                                 (TYPE_PRECISION (type)
3714                                  - tree_low_cst (pow2div, 1)));
3715
3716   return period;
3717 }
3718
3719 /* Returns the comparison operator used when eliminating the iv USE.  */
3720
3721 static enum tree_code
3722 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3723 {
3724   struct loop *loop = data->current_loop;
3725   basic_block ex_bb;
3726   edge exit;
3727
3728   ex_bb = bb_for_stmt (use->stmt);
3729   exit = EDGE_SUCC (ex_bb, 0);
3730   if (flow_bb_inside_loop_p (loop, exit->dest))
3731     exit = EDGE_SUCC (ex_bb, 1);
3732
3733   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3734 }
3735
3736 /* Check whether it is possible to express the condition in USE by comparison
3737    of candidate CAND.  If so, store the value compared with to BOUND.  */
3738
3739 static bool
3740 may_eliminate_iv (struct ivopts_data *data,
3741                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3742 {
3743   basic_block ex_bb;
3744   edge exit;
3745   tree nit, period;
3746   struct loop *loop = data->current_loop;
3747   aff_tree bnd;
3748
3749   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3750     return false;
3751
3752   /* For now works only for exits that dominate the loop latch.
3753      TODO: extend to other conditions inside loop body.  */
3754   ex_bb = bb_for_stmt (use->stmt);
3755   if (use->stmt != last_stmt (ex_bb)
3756       || TREE_CODE (use->stmt) != COND_EXPR)
3757     return false;
3758   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3759     return false;
3760
3761   exit = EDGE_SUCC (ex_bb, 0);
3762   if (flow_bb_inside_loop_p (loop, exit->dest))
3763     exit = EDGE_SUCC (ex_bb, 1);
3764   if (flow_bb_inside_loop_p (loop, exit->dest))
3765     return false;
3766
3767   nit = niter_for_exit (data, exit);
3768   if (!nit)
3769     return false;
3770
3771   /* Determine whether we can use the variable to test the exit condition.
3772      This is the case iff the period of the induction variable is greater
3773      than the number of iterations for which the exit condition is true.  */
3774   period = iv_period (cand->iv);
3775
3776   /* If the number of iterations is constant, compare against it directly.  */
3777   if (TREE_CODE (nit) == INTEGER_CST)
3778     {
3779       if (!tree_int_cst_lt (nit, period))
3780         return false;
3781     }
3782
3783   /* If not, and if this is the only possible exit of the loop, see whether
3784      we can get a conservative estimate on the number of iterations of the
3785      entire loop and compare against that instead.  */
3786   else if (loop_only_exit_p (loop, exit))
3787     {
3788       double_int period_value, max_niter;
3789       if (!estimated_loop_iterations (loop, true, &max_niter))
3790         return false;
3791       period_value = tree_to_double_int (period);
3792       if (double_int_ucmp (max_niter, period_value) >= 0)
3793         return false;
3794     }
3795
3796   /* Otherwise, punt.  */
3797   else
3798     return false;
3799
3800   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3801   *bound = aff_combination_to_tree (&bnd);
3802   return true;
3803 }
3804
3805 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3806
3807 static bool
3808 determine_use_iv_cost_condition (struct ivopts_data *data,
3809                                  struct iv_use *use, struct iv_cand *cand)
3810 {
3811   tree bound = NULL_TREE;
3812   struct iv *cmp_iv;
3813   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3814   comp_cost elim_cost, express_cost, cost;
3815   bool ok;
3816
3817   /* Only consider real candidates.  */
3818   if (!cand->iv)
3819     {
3820       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3821       return false;
3822     }
3823
3824   /* Try iv elimination.  */
3825   if (may_eliminate_iv (data, use, cand, &bound))
3826     {
3827       elim_cost = force_var_cost (data, bound, &depends_on_elim);
3828       /* The bound is a loop invariant, so it will be only computed
3829          once.  */
3830       elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3831     }
3832   else
3833     elim_cost = infinite_cost;
3834
3835   /* Try expressing the original giv.  If it is compared with an invariant,
3836      note that we cannot get rid of it.  */
3837   ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3838   gcc_assert (ok);
3839
3840   express_cost = get_computation_cost (data, use, cand, false,
3841                                        &depends_on_express);
3842   fd_ivopts_data = data;
3843   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3844
3845   /* Choose the better approach.  */
3846   if (compare_costs (elim_cost, express_cost) < 0)
3847     {
3848       cost = elim_cost;
3849       depends_on = depends_on_elim;
3850       depends_on_elim = NULL;
3851     }
3852   else
3853     {
3854       cost = express_cost;
3855       depends_on = depends_on_express;
3856       depends_on_express = NULL;
3857       bound = NULL_TREE;
3858     }
3859
3860   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3861
3862   if (depends_on_elim)
3863     BITMAP_FREE (depends_on_elim);
3864   if (depends_on_express)
3865     BITMAP_FREE (depends_on_express);
3866
3867   return !infinite_cost_p (cost);
3868 }
3869
3870 /* Determines cost of basing replacement of USE on CAND.  Returns false
3871    if USE cannot be based on CAND.  */
3872
3873 static bool
3874 determine_use_iv_cost (struct ivopts_data *data,
3875                        struct iv_use *use, struct iv_cand *cand)
3876 {
3877   switch (use->type)
3878     {
3879     case USE_NONLINEAR_EXPR:
3880       return determine_use_iv_cost_generic (data, use, cand);
3881
3882     case USE_ADDRESS:
3883       return determine_use_iv_cost_address (data, use, cand);
3884
3885     case USE_COMPARE:
3886       return determine_use_iv_cost_condition (data, use, cand);
3887
3888     default:
3889       gcc_unreachable ();
3890     }
3891 }
3892
3893 /* Determines costs of basing the use of the iv on an iv candidate.  */
3894
3895 static void
3896 determine_use_iv_costs (struct ivopts_data *data)
3897 {
3898   unsigned i, j;
3899   struct iv_use *use;
3900   struct iv_cand *cand;
3901   bitmap to_clear = BITMAP_ALLOC (NULL);
3902
3903   alloc_use_cost_map (data);
3904
3905   for (i = 0; i < n_iv_uses (data); i++)
3906     {
3907       use = iv_use (data, i);
3908
3909       if (data->consider_all_candidates)
3910         {
3911           for (j = 0; j < n_iv_cands (data); j++)
3912             {
3913               cand = iv_cand (data, j);
3914               determine_use_iv_cost (data, use, cand);
3915             }
3916         }
3917       else
3918         {
3919           bitmap_iterator bi;
3920
3921           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3922             {
3923               cand = iv_cand (data, j);
3924               if (!determine_use_iv_cost (data, use, cand))
3925                 bitmap_set_bit (to_clear, j);
3926             }
3927
3928           /* Remove the candidates for that the cost is infinite from
3929              the list of related candidates.  */
3930           bitmap_and_compl_into (use->related_cands, to_clear);
3931           bitmap_clear (to_clear);
3932         }
3933     }
3934
3935   BITMAP_FREE (to_clear);
3936
3937   if (dump_file && (dump_flags & TDF_DETAILS))
3938     {
3939       fprintf (dump_file, "Use-candidate costs:\n");
3940
3941       for (i = 0; i < n_iv_uses (data); i++)
3942         {
3943           use = iv_use (data, i);
3944
3945           fprintf (dump_file, "Use %d:\n", i);
3946           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
3947           for (j = 0; j < use->n_map_members; j++)
3948             {
3949               if (!use->cost_map[j].cand
3950                   || infinite_cost_p (use->cost_map[j].cost))
3951                 continue;
3952
3953               fprintf (dump_file, "  %d\t%d\t%d\t",
3954                        use->cost_map[j].cand->id,
3955                        use->cost_map[j].cost.cost,
3956                        use->cost_map[j].cost.complexity);
3957               if (use->cost_map[j].depends_on)
3958                 bitmap_print (dump_file,
3959                               use->cost_map[j].depends_on, "","");
3960               fprintf (dump_file, "\n");
3961             }
3962
3963           fprintf (dump_file, "\n");
3964         }
3965       fprintf (dump_file, "\n");
3966     }
3967 }
3968
3969 /* Determines cost of the candidate CAND.  */
3970
3971 static void
3972 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3973 {
3974   comp_cost cost_base;
3975   unsigned cost, cost_step;
3976   tree base;
3977
3978   if (!cand->iv)
3979     {
3980       cand->cost = 0;
3981       return;
3982     }
3983
3984   /* There are two costs associated with the candidate -- its increment
3985      and its initialization.  The second is almost negligible for any loop
3986      that rolls enough, so we take it just very little into account.  */
3987
3988   base = cand->iv->base;
3989   cost_base = force_var_cost (data, base, NULL);
3990   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3991
3992   cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
3993
3994   /* Prefer the original ivs unless we may gain something by replacing it.
3995      The reason is to make debugging simpler; so this is not relevant for
3996      artificial ivs created by other optimization passes.  */
3997   if (cand->pos != IP_ORIGINAL
3998       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3999     cost++;
4000
4001   /* Prefer not to insert statements into latch unless there are some
4002      already (so that we do not create unnecessary jumps).  */
4003   if (cand->pos == IP_END
4004       && empty_block_p (ip_end_pos (data->current_loop)))
4005     cost++;
4006
4007   cand->cost = cost;
4008 }
4009
4010 /* Determines costs of computation of the candidates.  */
4011
4012 static void
4013 determine_iv_costs (struct ivopts_data *data)
4014 {
4015   unsigned i;
4016
4017   if (dump_file && (dump_flags & TDF_DETAILS))
4018     {
4019       fprintf (dump_file, "Candidate costs:\n");
4020       fprintf (dump_file, "  cand\tcost\n");
4021     }
4022
4023   for (i = 0; i < n_iv_cands (data); i++)
4024     {
4025       struct iv_cand *cand = iv_cand (data, i);
4026
4027       determine_iv_cost (data, cand);
4028
4029       if (dump_file && (dump_flags & TDF_DETAILS))
4030         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4031     }
4032
4033   if (dump_file && (dump_flags & TDF_DETAILS))
4034     fprintf (dump_file, "\n");
4035 }
4036
4037 /* Calculates cost for having SIZE induction variables.  */
4038
4039 static unsigned
4040 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4041 {
4042   /* We add size to the cost, so that we prefer eliminating ivs
4043      if possible.  */
4044   return size + estimate_reg_pressure_cost (size, data->regs_used);
4045 }
4046
4047 /* For each size of the induction variable set determine the penalty.  */
4048
4049 static void
4050 determine_set_costs (struct ivopts_data *data)
4051 {
4052   unsigned j, n;
4053   tree phi, op;
4054   struct loop *loop = data->current_loop;
4055   bitmap_iterator bi;
4056
4057   /* We use the following model (definitely improvable, especially the
4058      cost function -- TODO):
4059
4060      We estimate the number of registers available (using MD data), name it A.
4061
4062      We estimate the number of registers used by the loop, name it U.  This
4063      number is obtained as the number of loop phi nodes (not counting virtual
4064      registers and bivs) + the number of variables from outside of the loop.
4065
4066      We set a reserve R (free regs that are used for temporary computations,
4067      etc.).  For now the reserve is a constant 3.
4068
4069      Let I be the number of induction variables.
4070
4071      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4072         make a lot of ivs without a reason).
4073      -- if A - R < U + I <= A, the cost is I * PRES_COST
4074      -- if U + I > A, the cost is I * PRES_COST and
4075         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4076
4077   if (dump_file && (dump_flags & TDF_DETAILS))
4078     {
4079       fprintf (dump_file, "Global costs:\n");
4080       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4081       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost);
4082       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4083     }
4084
4085   n = 0;
4086   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4087     {
4088       op = PHI_RESULT (phi);
4089
4090       if (!is_gimple_reg (op))
4091         continue;
4092
4093       if (get_iv (data, op))
4094         continue;
4095
4096       n++;
4097     }
4098
4099   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4100     {
4101       struct version_info *info = ver_info (data, j);
4102
4103       if (info->inv_id && info->has_nonlin_use)
4104         n++;
4105     }
4106
4107   data->regs_used = n;
4108   if (dump_file && (dump_flags & TDF_DETAILS))
4109     fprintf (dump_file, "  regs_used %d\n", n);
4110
4111   if (dump_file && (dump_flags & TDF_DETAILS))
4112     {
4113       fprintf (dump_file, "  cost for size:\n");
4114       fprintf (dump_file, "  ivs\tcost\n");
4115       for (j = 0; j <= 2 * target_avail_regs; j++)
4116         fprintf (dump_file, "  %d\t%d\n", j,
4117                  ivopts_global_cost_for_size (data, j));
4118       fprintf (dump_file, "\n");
4119     }
4120 }
4121
4122 /* Returns true if A is a cheaper cost pair than B.  */
4123
4124 static bool
4125 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4126 {
4127   int cmp;
4128
4129   if (!a)
4130     return false;
4131
4132   if (!b)
4133     return true;
4134
4135   cmp = compare_costs (a->cost, b->cost);
4136   if (cmp < 0)
4137     return true;
4138
4139   if (cmp > 0)
4140     return false;
4141
4142   /* In case the costs are the same, prefer the cheaper candidate.  */
4143   if (a->cand->cost < b->cand->cost)
4144     return true;
4145
4146   return false;
4147 }
4148
4149 /* Computes the cost field of IVS structure.  */
4150
4151 static void
4152 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4153 {
4154   comp_cost cost = ivs->cand_use_cost;
4155   cost.cost += ivs->cand_cost;
4156   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4157
4158   ivs->cost = cost;
4159 }
4160
4161 /* Remove invariants in set INVS to set IVS.  */
4162
4163 static void
4164 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4165 {
4166   bitmap_iterator bi;
4167   unsigned iid;
4168
4169   if (!invs)
4170     return;
4171
4172   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4173     {
4174       ivs->n_invariant_uses[iid]--;
4175       if (ivs->n_invariant_uses[iid] == 0)
4176         ivs->n_regs--;
4177     }
4178 }
4179
4180 /* Set USE not to be expressed by any candidate in IVS.  */
4181
4182 static void
4183 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4184                  struct iv_use *use)
4185 {
4186   unsigned uid = use->id, cid;
4187   struct cost_pair *cp;
4188
4189   cp = ivs->cand_for_use[uid];
4190   if (!cp)
4191     return;
4192   cid = cp->cand->id;
4193
4194   ivs->bad_uses++;
4195   ivs->cand_for_use[uid] = NULL;
4196   ivs->n_cand_uses[cid]--;
4197
4198   if (ivs->n_cand_uses[cid] == 0)
4199     {
4200       bitmap_clear_bit (ivs->cands, cid);
4201       /* Do not count the pseudocandidates.  */
4202       if (cp->cand->iv)
4203         ivs->n_regs--;
4204       ivs->n_cands--;
4205       ivs->cand_cost -= cp->cand->cost;
4206
4207       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4208     }
4209
4210   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4211
4212   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4213   iv_ca_recount_cost (data, ivs);
4214 }
4215
4216 /* Add invariants in set INVS to set IVS.  */
4217
4218 static void
4219 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4220 {
4221   bitmap_iterator bi;
4222   unsigned iid;
4223
4224   if (!invs)
4225     return;
4226
4227   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4228     {
4229       ivs->n_invariant_uses[iid]++;
4230       if (ivs->n_invariant_uses[iid] == 1)
4231         ivs->n_regs++;
4232     }
4233 }
4234
4235 /* Set cost pair for USE in set IVS to CP.  */
4236
4237 static void
4238 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4239               struct iv_use *use, struct cost_pair *cp)
4240 {
4241   unsigned uid = use->id, cid;
4242
4243   if (ivs->cand_for_use[uid] == cp)
4244     return;
4245
4246   if (ivs->cand_for_use[uid])
4247     iv_ca_set_no_cp (data, ivs, use);
4248
4249   if (cp)
4250     {
4251       cid = cp->cand->id;
4252
4253       ivs->bad_uses--;
4254       ivs->cand_for_use[uid] = cp;
4255       ivs->n_cand_uses[cid]++;
4256       if (ivs->n_cand_uses[cid] == 1)
4257         {
4258           bitmap_set_bit (ivs->cands, cid);
4259           /* Do not count the pseudocandidates.  */
4260           if (cp->cand->iv)
4261             ivs->n_regs++;
4262           ivs->n_cands++;
4263           ivs->cand_cost += cp->cand->cost;
4264
4265           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4266         }
4267
4268       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4269       iv_ca_set_add_invariants (ivs, cp->depends_on);
4270       iv_ca_recount_cost (data, ivs);
4271     }
4272 }
4273
4274 /* Extend set IVS by expressing USE by some of the candidates in it
4275    if possible.  */
4276
4277 static void
4278 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4279                struct iv_use *use)
4280 {
4281   struct cost_pair *best_cp = NULL, *cp;
4282   bitmap_iterator bi;
4283   unsigned i;
4284
4285   gcc_assert (ivs->upto >= use->id);
4286
4287   if (ivs->upto == use->id)
4288     {
4289       ivs->upto++;
4290       ivs->bad_uses++;
4291     }
4292
4293   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4294     {
4295       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4296
4297       if (cheaper_cost_pair (cp, best_cp))
4298         best_cp = cp;
4299     }
4300
4301   iv_ca_set_cp (data, ivs, use, best_cp);
4302 }
4303
4304 /* Get cost for assignment IVS.  */
4305
4306 static comp_cost
4307 iv_ca_cost (struct iv_ca *ivs)
4308 {
4309   return (ivs->bad_uses ? infinite_cost : ivs->cost);
4310 }
4311
4312 /* Returns true if all dependences of CP are among invariants in IVS.  */
4313
4314 static bool
4315 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4316 {
4317   unsigned i;
4318   bitmap_iterator bi;
4319
4320   if (!cp->depends_on)
4321     return true;
4322
4323   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4324     {
4325       if (ivs->n_invariant_uses[i] == 0)
4326         return false;
4327     }
4328
4329   return true;
4330 }
4331
4332 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4333    it before NEXT_CHANGE.  */
4334
4335 static struct iv_ca_delta *
4336 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4337                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4338 {
4339   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4340
4341   change->use = use;
4342   change->old_cp = old_cp;
4343   change->new_cp = new_cp;
4344   change->next_change = next_change;
4345
4346   return change;
4347 }
4348
4349 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4350    are rewritten.  */
4351
4352 static struct iv_ca_delta *
4353 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4354 {
4355   struct iv_ca_delta *last;
4356
4357   if (!l2)
4358     return l1;
4359
4360   if (!l1)
4361     return l2;
4362
4363   for (last = l1; last->next_change; last = last->next_change)
4364     continue;
4365   last->next_change = l2;
4366
4367   return l1;
4368 }
4369
4370 /* Returns candidate by that USE is expressed in IVS.  */
4371
4372 static struct cost_pair *
4373 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4374 {
4375   return ivs->cand_for_use[use->id];
4376 }
4377
4378 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4379
4380 static struct iv_ca_delta *
4381 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4382 {
4383   struct iv_ca_delta *act, *next, *prev = NULL;
4384   struct cost_pair *tmp;
4385
4386   for (act = delta; act; act = next)
4387     {
4388       next = act->next_change;
4389       act->next_change = prev;
4390       prev = act;
4391
4392       tmp = act->old_cp;
4393       act->old_cp = act->new_cp;
4394       act->new_cp = tmp;
4395     }
4396
4397   return prev;
4398 }
4399
4400 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4401    reverted instead.  */
4402
4403 static void
4404 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4405                     struct iv_ca_delta *delta, bool forward)
4406 {
4407   struct cost_pair *from, *to;
4408   struct iv_ca_delta *act;
4409
4410   if (!forward)
4411     delta = iv_ca_delta_reverse (delta);
4412
4413   for (act = delta; act; act = act->next_change)
4414     {
4415       from = act->old_cp;
4416       to = act->new_cp;
4417       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4418       iv_ca_set_cp (data, ivs, act->use, to);
4419     }
4420
4421   if (!forward)
4422     iv_ca_delta_reverse (delta);
4423 }
4424
4425 /* Returns true if CAND is used in IVS.  */
4426
4427 static bool
4428 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4429 {
4430   return ivs->n_cand_uses[cand->id] > 0;
4431 }
4432
4433 /* Returns number of induction variable candidates in the set IVS.  */
4434
4435 static unsigned
4436 iv_ca_n_cands (struct iv_ca *ivs)
4437 {
4438   return ivs->n_cands;
4439 }
4440
4441 /* Free the list of changes DELTA.  */
4442
4443 static void
4444 iv_ca_delta_free (struct iv_ca_delta **delta)
4445 {
4446   struct iv_ca_delta *act, *next;
4447
4448   for (act = *delta; act; act = next)
4449     {
4450       next = act->next_change;
4451       free (act);
4452     }
4453
4454   *delta = NULL;
4455 }
4456
4457 /* Allocates new iv candidates assignment.  */
4458
4459 static struct iv_ca *
4460 iv_ca_new (struct ivopts_data *data)
4461 {
4462   struct iv_ca *nw = XNEW (struct iv_ca);
4463
4464   nw->upto = 0;
4465   nw->bad_uses = 0;
4466   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4467   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4468   nw->cands = BITMAP_ALLOC (NULL);
4469   nw->n_cands = 0;
4470   nw->n_regs = 0;
4471   nw->cand_use_cost = zero_cost;
4472   nw->cand_cost = 0;
4473   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4474   nw->cost = zero_cost;
4475
4476   return nw;
4477 }
4478
4479 /* Free memory occupied by the set IVS.  */
4480
4481 static void
4482 iv_ca_free (struct iv_ca **ivs)
4483 {
4484   free ((*ivs)->cand_for_use);
4485   free ((*ivs)->n_cand_uses);
4486   BITMAP_FREE ((*ivs)->cands);
4487   free ((*ivs)->n_invariant_uses);
4488   free (*ivs);
4489   *ivs = NULL;
4490 }
4491
4492 /* Dumps IVS to FILE.  */
4493
4494 static void
4495 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4496 {
4497   const char *pref = "  invariants ";
4498   unsigned i;
4499   comp_cost cost = iv_ca_cost (ivs);
4500
4501   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4502   bitmap_print (file, ivs->cands, "  candidates ","\n");
4503
4504   for (i = 1; i <= data->max_inv_id; i++)
4505     if (ivs->n_invariant_uses[i])
4506       {
4507         fprintf (file, "%s%d", pref, i);
4508         pref = ", ";
4509       }
4510   fprintf (file, "\n");
4511 }
4512
4513 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4514    new set, and store differences in DELTA.  Number of induction variables
4515    in the new set is stored to N_IVS.  */
4516
4517 static comp_cost
4518 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4519               struct iv_cand *cand, struct iv_ca_delta **delta,
4520               unsigned *n_ivs)
4521 {
4522   unsigned i;
4523   comp_cost cost;
4524   struct iv_use *use;
4525   struct cost_pair *old_cp, *new_cp;
4526
4527   *delta = NULL;
4528   for (i = 0; i < ivs->upto; i++)
4529     {
4530       use = iv_use (data, i);
4531       old_cp = iv_ca_cand_for_use (ivs, use);
4532
4533       if (old_cp
4534           && old_cp->cand == cand)
4535         continue;
4536
4537       new_cp = get_use_iv_cost (data, use, cand);
4538       if (!new_cp)
4539         continue;
4540
4541       if (!iv_ca_has_deps (ivs, new_cp))
4542         continue;
4543
4544       if (!cheaper_cost_pair (new_cp, old_cp))
4545         continue;
4546
4547       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4548     }
4549
4550   iv_ca_delta_commit (data, ivs, *delta, true);
4551   cost = iv_ca_cost (ivs);
4552   if (n_ivs)
4553     *n_ivs = iv_ca_n_cands (ivs);
4554   iv_ca_delta_commit (data, ivs, *delta, false);
4555
4556   return cost;
4557 }
4558
4559 /* Try narrowing set IVS by removing CAND.  Return the cost of
4560    the new set and store the differences in DELTA.  */
4561
4562 static comp_cost
4563 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4564               struct iv_cand *cand, struct iv_ca_delta **delta)
4565 {
4566   unsigned i, ci;
4567   struct iv_use *use;
4568   struct cost_pair *old_cp, *new_cp, *cp;
4569   bitmap_iterator bi;
4570   struct iv_cand *cnd;
4571   comp_cost cost;
4572
4573   *delta = NULL;
4574   for (i = 0; i < n_iv_uses (data); i++)
4575     {
4576       use = iv_use (data, i);
4577
4578       old_cp = iv_ca_cand_for_use (ivs, use);
4579       if (old_cp->cand != cand)
4580         continue;
4581
4582       new_cp = NULL;
4583
4584       if (data->consider_all_candidates)
4585         {
4586           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4587             {
4588               if (ci == cand->id)
4589                 continue;
4590
4591               cnd = iv_cand (data, ci);
4592
4593               cp = get_use_iv_cost (data, use, cnd);
4594               if (!cp)
4595                 continue;
4596               if (!iv_ca_has_deps (ivs, cp))
4597                 continue;
4598
4599               if (!cheaper_cost_pair (cp, new_cp))
4600                 continue;
4601
4602               new_cp = cp;
4603             }
4604         }
4605       else
4606         {
4607           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4608             {
4609               if (ci == cand->id)
4610                 continue;
4611
4612               cnd = iv_cand (data, ci);
4613
4614               cp = get_use_iv_cost (data, use, cnd);
4615               if (!cp)
4616                 continue;
4617               if (!iv_ca_has_deps (ivs, cp))
4618                 continue;
4619
4620               if (!cheaper_cost_pair (cp, new_cp))
4621                 continue;
4622
4623               new_cp = cp;
4624             }
4625         }
4626
4627       if (!new_cp)
4628         {
4629           iv_ca_delta_free (delta);
4630           return infinite_cost;
4631         }
4632
4633       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4634     }
4635
4636   iv_ca_delta_commit (data, ivs, *delta, true);
4637   cost = iv_ca_cost (ivs);
4638   iv_ca_delta_commit (data, ivs, *delta, false);
4639
4640   return cost;
4641 }
4642
4643 /* Try optimizing the set of candidates IVS by removing candidates different
4644    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4645    differences in DELTA.  */
4646
4647 static comp_cost
4648 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4649              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4650 {
4651   bitmap_iterator bi;
4652   struct iv_ca_delta *act_delta, *best_delta;
4653   unsigned i;
4654   comp_cost best_cost, acost;
4655   struct iv_cand *cand;
4656
4657   best_delta = NULL;
4658   best_cost = iv_ca_cost (ivs);
4659
4660   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4661     {
4662       cand = iv_cand (data, i);
4663
4664       if (cand == except_cand)
4665         continue;
4666
4667       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4668
4669       if (compare_costs (acost, best_cost) < 0)
4670         {
4671           best_cost = acost;
4672           iv_ca_delta_free (&best_delta);
4673           best_delta = act_delta;
4674         }
4675       else
4676         iv_ca_delta_free (&act_delta);
4677     }
4678
4679   if (!best_delta)
4680     {
4681       *delta = NULL;
4682       return best_cost;
4683     }
4684
4685   /* Recurse to possibly remove other unnecessary ivs.  */
4686   iv_ca_delta_commit (data, ivs, best_delta, true);
4687   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4688   iv_ca_delta_commit (data, ivs, best_delta, false);
4689   *delta = iv_ca_delta_join (best_delta, *delta);
4690   return best_cost;
4691 }
4692
4693 /* Tries to extend the sets IVS in the best possible way in order
4694    to express the USE.  */
4695
4696 static bool
4697 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4698                   struct iv_use *use)
4699 {
4700   comp_cost best_cost, act_cost;
4701   unsigned i;
4702   bitmap_iterator bi;
4703   struct iv_cand *cand;
4704   struct iv_ca_delta *best_delta = NULL, *act_delta;
4705   struct cost_pair *cp;
4706
4707   iv_ca_add_use (data, ivs, use);
4708   best_cost = iv_ca_cost (ivs);
4709
4710   cp = iv_ca_cand_for_use (ivs, use);
4711   if (cp)
4712     {
4713       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4714       iv_ca_set_no_cp (data, ivs, use);
4715     }
4716
4717   /* First try important candidates not based on any memory object.  Only if
4718      this fails, try the specific ones.  Rationale -- in loops with many
4719      variables the best choice often is to use just one generic biv.  If we
4720      added here many ivs specific to the uses, the optimization algorithm later
4721      would be likely to get stuck in a local minimum, thus causing us to create
4722      too many ivs.  The approach from few ivs to more seems more likely to be
4723      successful -- starting from few ivs, replacing an expensive use by a
4724      specific iv should always be a win.  */
4725   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4726     {
4727       cand = iv_cand (data, i);
4728
4729       if (cand->iv->base_object != NULL_TREE)
4730         continue;
4731
4732       if (iv_ca_cand_used_p (ivs, cand))
4733         continue;
4734
4735       cp = get_use_iv_cost (data, use, cand);
4736       if (!cp)
4737         continue;
4738
4739       iv_ca_set_cp (data, ivs, use, cp);
4740       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4741       iv_ca_set_no_cp (data, ivs, use);
4742       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4743
4744       if (compare_costs (act_cost, best_cost) < 0)
4745         {
4746           best_cost = act_cost;
4747
4748           iv_ca_delta_free (&best_delta);
4749           best_delta = act_delta;
4750         }
4751       else
4752         iv_ca_delta_free (&act_delta);
4753     }
4754
4755   if (infinite_cost_p (best_cost))
4756     {
4757       for (i = 0; i < use->n_map_members; i++)
4758         {
4759           cp = use->cost_map + i;
4760           cand = cp->cand;
4761           if (!cand)
4762             continue;
4763
4764           /* Already tried this.  */
4765           if (cand->important && cand->iv->base_object == NULL_TREE)
4766             continue;
4767
4768           if (iv_ca_cand_used_p (ivs, cand))
4769             continue;
4770
4771           act_delta = NULL;
4772           iv_ca_set_cp (data, ivs, use, cp);
4773           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4774           iv_ca_set_no_cp (data, ivs, use);
4775           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4776                                        cp, act_delta);
4777
4778           if (compare_costs (act_cost, best_cost) < 0)
4779             {
4780               best_cost = act_cost;
4781
4782               if (best_delta)
4783                 iv_ca_delta_free (&best_delta);
4784               best_delta = act_delta;
4785             }
4786           else
4787             iv_ca_delta_free (&act_delta);
4788         }
4789     }
4790
4791   iv_ca_delta_commit (data, ivs, best_delta, true);
4792   iv_ca_delta_free (&best_delta);
4793
4794   return !infinite_cost_p (best_cost);
4795 }
4796
4797 /* Finds an initial assignment of candidates to uses.  */
4798
4799 static struct iv_ca *
4800 get_initial_solution (struct ivopts_data *data)
4801 {
4802   struct iv_ca *ivs = iv_ca_new (data);
4803   unsigned i;
4804
4805   for (i = 0; i < n_iv_uses (data); i++)
4806     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4807       {
4808         iv_ca_free (&ivs);
4809         return NULL;
4810       }
4811
4812   return ivs;
4813 }
4814
4815 /* Tries to improve set of induction variables IVS.  */
4816
4817 static bool
4818 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4819 {
4820   unsigned i, n_ivs;
4821   comp_cost acost, best_cost = iv_ca_cost (ivs);
4822   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4823   struct iv_cand *cand;
4824
4825   /* Try extending the set of induction variables by one.  */
4826   for (i = 0; i < n_iv_cands (data); i++)
4827     {
4828       cand = iv_cand (data, i);
4829
4830       if (iv_ca_cand_used_p (ivs, cand))
4831         continue;
4832
4833       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4834       if (!act_delta)
4835         continue;
4836
4837       /* If we successfully added the candidate and the set is small enough,
4838          try optimizing it by removing other candidates.  */
4839       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4840         {
4841           iv_ca_delta_commit (data, ivs, act_delta, true);
4842           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4843           iv_ca_delta_commit (data, ivs, act_delta, false);
4844           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4845         }
4846
4847       if (compare_costs (acost, best_cost) < 0)
4848         {
4849           best_cost = acost;
4850           iv_ca_delta_free (&best_delta);
4851           best_delta = act_delta;
4852         }
4853       else
4854         iv_ca_delta_free (&act_delta);
4855     }
4856
4857   if (!best_delta)
4858     {
4859       /* Try removing the candidates from the set instead.  */
4860       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4861
4862       /* Nothing more we can do.  */
4863       if (!best_delta)
4864         return false;
4865     }
4866
4867   iv_ca_delta_commit (data, ivs, best_delta, true);
4868   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4869   iv_ca_delta_free (&best_delta);
4870   return true;
4871 }
4872
4873 /* Attempts to find the optimal set of induction variables.  We do simple
4874    greedy heuristic -- we try to replace at most one candidate in the selected
4875    solution and remove the unused ivs while this improves the cost.  */
4876
4877 static struct iv_ca *
4878 find_optimal_iv_set (struct ivopts_data *data)
4879 {
4880   unsigned i;
4881   struct iv_ca *set;
4882   struct iv_use *use;
4883
4884   /* Get the initial solution.  */
4885   set = get_initial_solution (data);
4886   if (!set)
4887     {
4888       if (dump_file && (dump_flags & TDF_DETAILS))
4889         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4890       return NULL;
4891     }
4892
4893   if (dump_file && (dump_flags & TDF_DETAILS))
4894     {
4895       fprintf (dump_file, "Initial set of candidates:\n");
4896       iv_ca_dump (data, dump_file, set);
4897     }
4898
4899   while (try_improve_iv_set (data, set))
4900     {
4901       if (dump_file && (dump_flags & TDF_DETAILS))
4902         {
4903           fprintf (dump_file, "Improved to:\n");
4904           iv_ca_dump (data, dump_file, set);
4905         }
4906     }
4907
4908   if (dump_file && (dump_flags & TDF_DETAILS))
4909     {
4910       comp_cost cost = iv_ca_cost (set);
4911       fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4912     }
4913
4914   for (i = 0; i < n_iv_uses (data); i++)
4915     {
4916       use = iv_use (data, i);
4917       use->selected = iv_ca_cand_for_use (set, use)->cand;
4918     }
4919
4920   return set;
4921 }
4922
4923 /* Creates a new induction variable corresponding to CAND.  */
4924
4925 static void
4926 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4927 {
4928   block_stmt_iterator incr_pos;
4929   tree base;
4930   bool after = false;
4931
4932   if (!cand->iv)
4933     return;
4934
4935   switch (cand->pos)
4936     {
4937     case IP_NORMAL:
4938       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4939       break;
4940
4941     case IP_END:
4942       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4943       after = true;
4944       break;
4945
4946     case IP_ORIGINAL:
4947       /* Mark that the iv is preserved.  */
4948       name_info (data, cand->var_before)->preserve_biv = true;
4949       name_info (data, cand->var_after)->preserve_biv = true;
4950
4951       /* Rewrite the increment so that it uses var_before directly.  */
4952       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4953
4954       return;
4955     }
4956
4957   gimple_add_tmp_var (cand->var_before);
4958   add_referenced_var (cand->var_before);
4959
4960   base = unshare_expr (cand->iv->base);
4961
4962   create_iv (base, unshare_expr (cand->iv->step),
4963              cand->var_before, data->current_loop,
4964              &incr_pos, after, &cand->var_before, &cand->var_after);
4965 }
4966
4967 /* Creates new induction variables described in SET.  */
4968
4969 static void
4970 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4971 {
4972   unsigned i;
4973   struct iv_cand *cand;
4974   bitmap_iterator bi;
4975
4976   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4977     {
4978       cand = iv_cand (data, i);
4979       create_new_iv (data, cand);
4980     }
4981 }
4982
4983 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4984    is true, remove also the ssa name defined by the statement.  */
4985
4986 static void
4987 remove_statement (tree stmt, bool including_defined_name)
4988 {
4989   if (TREE_CODE (stmt) == PHI_NODE)
4990     {
4991       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4992     }
4993   else
4994     {
4995       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4996
4997       bsi_remove (&bsi, true);
4998       release_defs (stmt);
4999     }
5000 }
5001
5002 /* Rewrites USE (definition of iv used in a nonlinear expression)
5003    using candidate CAND.  */
5004
5005 static void
5006 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5007                             struct iv_use *use, struct iv_cand *cand)
5008 {
5009   tree comp;
5010   tree op, tgt, ass;
5011   block_stmt_iterator bsi;
5012
5013   /* An important special case -- if we are asked to express value of
5014      the original iv by itself, just exit; there is no need to
5015      introduce a new computation (that might also need casting the
5016      variable to unsigned and back).  */
5017   if (cand->pos == IP_ORIGINAL
5018       && cand->incremented_at == use->stmt)
5019     {
5020       tree step, ctype, utype;
5021       enum tree_code incr_code = PLUS_EXPR;
5022
5023       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
5024       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
5025
5026       step = cand->iv->step;
5027       ctype = TREE_TYPE (step);
5028       utype = TREE_TYPE (cand->var_after);
5029       if (TREE_CODE (step) == NEGATE_EXPR)
5030         {
5031           incr_code = MINUS_EXPR;
5032           step = TREE_OPERAND (step, 0);
5033         }
5034
5035       /* Check whether we may leave the computation unchanged.
5036          This is the case only if it does not rely on other
5037          computations in the loop -- otherwise, the computation
5038          we rely upon may be removed in remove_unused_ivs,
5039          thus leading to ICE.  */
5040       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
5041       if (TREE_CODE (op) == PLUS_EXPR
5042           || TREE_CODE (op) == MINUS_EXPR
5043           || TREE_CODE (op) == POINTER_PLUS_EXPR)
5044         {
5045           if (TREE_OPERAND (op, 0) == cand->var_before)
5046             op = TREE_OPERAND (op, 1);
5047           else if (TREE_CODE (op) != MINUS_EXPR
5048                    && TREE_OPERAND (op, 1) == cand->var_before)
5049             op = TREE_OPERAND (op, 0);
5050           else
5051             op = NULL_TREE;
5052         }
5053       else
5054         op = NULL_TREE;
5055
5056       if (op
5057           && (TREE_CODE (op) == INTEGER_CST
5058               || operand_equal_p (op, step, 0)))
5059         return;
5060
5061       /* Otherwise, add the necessary computations to express
5062          the iv.  */
5063       op = fold_convert (ctype, cand->var_before);
5064       comp = fold_convert (utype,
5065                            build2 (incr_code, ctype, op,
5066                                    unshare_expr (step)));
5067     }
5068   else
5069     {
5070       comp = get_computation (data->current_loop, use, cand);
5071       gcc_assert (comp != NULL_TREE);
5072     }
5073
5074   switch (TREE_CODE (use->stmt))
5075     {
5076     case PHI_NODE:
5077       tgt = PHI_RESULT (use->stmt);
5078
5079       /* If we should keep the biv, do not replace it.  */
5080       if (name_info (data, tgt)->preserve_biv)
5081         return;
5082
5083       bsi = bsi_after_labels (bb_for_stmt (use->stmt));
5084       break;
5085
5086     case GIMPLE_MODIFY_STMT:
5087       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
5088       bsi = bsi_for_stmt (use->stmt);
5089       break;
5090
5091     default:
5092       gcc_unreachable ();
5093     }
5094
5095   op = force_gimple_operand_bsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5096                                  true, BSI_SAME_STMT);
5097
5098   if (TREE_CODE (use->stmt) == PHI_NODE)
5099     {
5100       ass = build_gimple_modify_stmt (tgt, op);
5101       bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
5102       remove_statement (use->stmt, false);
5103       SSA_NAME_DEF_STMT (tgt) = ass;
5104     }
5105   else
5106     GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
5107 }
5108
5109 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5110    for_each_index.  */
5111
5112 static bool
5113 idx_remove_ssa_names (tree base, tree *idx,
5114                       void *data ATTRIBUTE_UNUSED)
5115 {
5116   tree *op;
5117
5118   if (TREE_CODE (*idx) == SSA_NAME)
5119     *idx = SSA_NAME_VAR (*idx);
5120
5121   if (TREE_CODE (base) == ARRAY_REF)
5122     {
5123       op = &TREE_OPERAND (base, 2);
5124       if (*op
5125           && TREE_CODE (*op) == SSA_NAME)
5126         *op = SSA_NAME_VAR (*op);
5127       op = &TREE_OPERAND (base, 3);
5128       if (*op
5129           && TREE_CODE (*op) == SSA_NAME)
5130         *op = SSA_NAME_VAR (*op);
5131     }
5132
5133   return true;
5134 }
5135
5136 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5137
5138 static tree
5139 unshare_and_remove_ssa_names (tree ref)
5140 {
5141   ref = unshare_expr (ref);
5142   for_each_index (&ref, idx_remove_ssa_names, NULL);
5143
5144   return ref;
5145 }
5146
5147 /* Extract the alias analysis info for the memory reference REF.  There are
5148    several ways how this information may be stored and what precisely is
5149    its semantics depending on the type of the reference, but there always is
5150    somewhere hidden one _DECL node that is used to determine the set of
5151    virtual operands for the reference.  The code below deciphers this jungle
5152    and extracts this single useful piece of information.  */
5153
5154 static tree
5155 get_ref_tag (tree ref, tree orig)
5156 {
5157   tree var = get_base_address (ref);
5158   tree aref = NULL_TREE, tag, sv;
5159   HOST_WIDE_INT offset, size, maxsize;
5160
5161   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5162     {
5163       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5164       if (ref)
5165         break;
5166     }
5167
5168   if (!var)
5169     return NULL_TREE;
5170
5171   if (TREE_CODE (var) == INDIRECT_REF)
5172     {
5173       /* If the base is a dereference of a pointer, first check its name memory
5174          tag.  If it does not have one, use its symbol memory tag.  */
5175       var = TREE_OPERAND (var, 0);
5176       if (TREE_CODE (var) != SSA_NAME)
5177         return NULL_TREE;
5178
5179       if (SSA_NAME_PTR_INFO (var))
5180         {
5181           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5182           if (tag)
5183             return tag;
5184         }
5185
5186       var = SSA_NAME_VAR (var);
5187       tag = symbol_mem_tag (var);
5188       gcc_assert (tag != NULL_TREE);
5189       return tag;
5190     }
5191   else
5192     {
5193       if (!DECL_P (var))
5194         return NULL_TREE;
5195
5196       tag = symbol_mem_tag (var);
5197       if (tag)
5198         return tag;
5199
5200       return var;
5201     }
5202 }
5203
5204 /* Copies the reference information from OLD_REF to NEW_REF.  */
5205
5206 static void
5207 copy_ref_info (tree new_ref, tree old_ref)
5208 {
5209   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5210     copy_mem_ref_info (new_ref, old_ref);
5211   else
5212     {
5213       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5214       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5215     }
5216 }
5217
5218 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5219
5220 static void
5221 rewrite_use_address (struct ivopts_data *data,
5222                      struct iv_use *use, struct iv_cand *cand)
5223 {
5224   aff_tree aff;
5225   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5226   tree ref;
5227   bool ok;
5228
5229   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5230   gcc_assert (ok);
5231   unshare_aff_combination (&aff);
5232
5233   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5234   copy_ref_info (ref, *use->op_p);
5235   *use->op_p = ref;
5236 }
5237
5238 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5239    candidate CAND.  */
5240
5241 static void
5242 rewrite_use_compare (struct ivopts_data *data,
5243                      struct iv_use *use, struct iv_cand *cand)
5244 {
5245   tree comp, *var_p, op, bound;
5246   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5247   enum tree_code compare;
5248   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5249   bool ok;
5250
5251   bound = cp->value;
5252   if (bound)
5253     {
5254       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5255       tree var_type = TREE_TYPE (var);
5256
5257       compare = iv_elimination_compare (data, use);
5258       bound = unshare_expr (fold_convert (var_type, bound));
5259       op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE,
5260                                      true, BSI_SAME_STMT);
5261
5262       *use->op_p = build2 (compare, boolean_type_node, var, op);
5263       return;
5264     }
5265
5266   /* The induction variable elimination failed; just express the original
5267      giv.  */
5268   comp = get_computation (data->current_loop, use, cand);
5269   gcc_assert (comp != NULL_TREE);
5270
5271   ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5272   gcc_assert (ok);
5273
5274   *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5275                                      true, BSI_SAME_STMT);
5276 }
5277
5278 /* Rewrites USE using candidate CAND.  */
5279
5280 static void
5281 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5282 {
5283   push_stmt_changes (&use->stmt);
5284
5285   switch (use->type)
5286     {
5287       case USE_NONLINEAR_EXPR:
5288         rewrite_use_nonlinear_expr (data, use, cand);
5289         break;
5290
5291       case USE_ADDRESS:
5292         rewrite_use_address (data, use, cand);
5293         break;
5294
5295       case USE_COMPARE:
5296         rewrite_use_compare (data, use, cand);
5297         break;
5298
5299       default:
5300         gcc_unreachable ();
5301     }
5302
5303   pop_stmt_changes (&use->stmt);
5304 }
5305
5306 /* Rewrite the uses using the selected induction variables.  */
5307
5308 static void
5309 rewrite_uses (struct ivopts_data *data)
5310 {
5311   unsigned i;
5312   struct iv_cand *cand;
5313   struct iv_use *use;
5314
5315   for (i = 0; i < n_iv_uses (data); i++)
5316     {
5317       use = iv_use (data, i);
5318       cand = use->selected;
5319       gcc_assert (cand);
5320
5321       rewrite_use (data, use, cand);
5322     }
5323 }
5324
5325 /* Removes the ivs that are not used after rewriting.  */
5326
5327 static void
5328 remove_unused_ivs (struct ivopts_data *data)
5329 {
5330   unsigned j;
5331   bitmap_iterator bi;
5332
5333   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5334     {
5335       struct version_info *info;
5336
5337       info = ver_info (data, j);
5338       if (info->iv
5339           && !integer_zerop (info->iv->step)
5340           && !info->inv_id
5341           && !info->iv->have_use_for
5342           && !info->preserve_biv)
5343         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5344     }
5345 }
5346
5347 /* Frees data allocated by the optimization of a single loop.  */
5348
5349 static void
5350 free_loop_data (struct ivopts_data *data)
5351 {
5352   unsigned i, j;
5353   bitmap_iterator bi;
5354   tree obj;
5355
5356   if (data->niters)
5357     {
5358       pointer_map_destroy (data->niters);
5359       data->niters = NULL;
5360     }
5361
5362   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5363     {
5364       struct version_info *info;
5365
5366       info = ver_info (data, i);
5367       if (info->iv)
5368         free (info->iv);
5369       info->iv = NULL;
5370       info->has_nonlin_use = false;
5371       info->preserve_biv = false;
5372       info->inv_id = 0;
5373     }
5374   bitmap_clear (data->relevant);
5375   bitmap_clear (data->important_candidates);
5376
5377   for (i = 0; i < n_iv_uses (data); i++)
5378     {
5379       struct iv_use *use = iv_use (data, i);
5380
5381       free (use->iv);
5382       BITMAP_FREE (use->related_cands);
5383       for (j = 0; j < use->n_map_members; j++)
5384         if (use->cost_map[j].depends_on)
5385           BITMAP_FREE (use->cost_map[j].depends_on);
5386       free (use->cost_map);
5387       free (use);
5388     }
5389   VEC_truncate (iv_use_p, data->iv_uses, 0);
5390
5391   for (i = 0; i < n_iv_cands (data); i++)
5392     {
5393       struct iv_cand *cand = iv_cand (data, i);
5394
5395       if (cand->iv)
5396         free (cand->iv);
5397       if (cand->depends_on)
5398         BITMAP_FREE (cand->depends_on);
5399       free (cand);
5400     }
5401   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5402
5403   if (data->version_info_size < num_ssa_names)
5404     {
5405       data->version_info_size = 2 * num_ssa_names;
5406       free (data->version_info);
5407       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5408     }
5409
5410   data->max_inv_id = 0;
5411
5412   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5413     SET_DECL_RTL (obj, NULL_RTX);
5414
5415   VEC_truncate (tree, decl_rtl_to_reset, 0);
5416 }
5417
5418 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5419    loop tree.  */
5420
5421 static void
5422 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5423 {
5424   free_loop_data (data);
5425   free (data->version_info);
5426   BITMAP_FREE (data->relevant);
5427   BITMAP_FREE (data->important_candidates);
5428
5429   VEC_free (tree, heap, decl_rtl_to_reset);
5430   VEC_free (iv_use_p, heap, data->iv_uses);
5431   VEC_free (iv_cand_p, heap, data->iv_candidates);
5432 }
5433
5434 /* Optimizes the LOOP.  Returns true if anything changed.  */
5435
5436 static bool
5437 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5438 {
5439   bool changed = false;
5440   struct iv_ca *iv_ca;
5441   edge exit;
5442
5443   gcc_assert (!data->niters);
5444   data->current_loop = loop;
5445
5446   if (dump_file && (dump_flags & TDF_DETAILS))
5447     {
5448       fprintf (dump_file, "Processing loop %d\n", loop->num);
5449
5450       exit = single_dom_exit (loop);
5451       if (exit)
5452         {
5453           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5454                    exit->src->index, exit->dest->index);
5455           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5456           fprintf (dump_file, "\n");
5457         }
5458
5459       fprintf (dump_file, "\n");
5460     }
5461
5462   /* For each ssa name determines whether it behaves as an induction variable
5463      in some loop.  */
5464   if (!find_induction_variables (data))
5465     goto finish;
5466
5467   /* Finds interesting uses (item 1).  */
5468   find_interesting_uses (data);
5469   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5470     goto finish;
5471
5472   /* Finds candidates for the induction variables (item 2).  */
5473   find_iv_candidates (data);
5474
5475   /* Calculates the costs (item 3, part 1).  */
5476   determine_use_iv_costs (data);
5477   determine_iv_costs (data);
5478   determine_set_costs (data);
5479
5480   /* Find the optimal set of induction variables (item 3, part 2).  */
5481   iv_ca = find_optimal_iv_set (data);
5482   if (!iv_ca)
5483     goto finish;
5484   changed = true;
5485
5486   /* Create the new induction variables (item 4, part 1).  */
5487   create_new_ivs (data, iv_ca);
5488   iv_ca_free (&iv_ca);
5489
5490   /* Rewrite the uses (item 4, part 2).  */
5491   rewrite_uses (data);
5492
5493   /* Remove the ivs that are unused after rewriting.  */
5494   remove_unused_ivs (data);
5495
5496   /* We have changed the structure of induction variables; it might happen
5497      that definitions in the scev database refer to some of them that were
5498      eliminated.  */
5499   scev_reset ();
5500
5501 finish:
5502   free_loop_data (data);
5503
5504   return changed;
5505 }
5506
5507 /* Main entry point.  Optimizes induction variables in loops.  */
5508
5509 void
5510 tree_ssa_iv_optimize (void)
5511 {
5512   struct loop *loop;
5513   struct ivopts_data data;
5514   loop_iterator li;
5515
5516   tree_ssa_iv_optimize_init (&data);
5517
5518   /* Optimize the loops starting with the innermost ones.  */
5519   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5520     {
5521       if (dump_file && (dump_flags & TDF_DETAILS))
5522         flow_loop_dump (loop, dump_file, NULL, 1);
5523
5524       tree_ssa_iv_optimize_loop (&data, loop);
5525     }
5526
5527   tree_ssa_iv_optimize_finalize (&data);
5528 }